In [None]:
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Read the CSV file into a DataFrame
df = pd.read_csv('/content/drive/MyDrive/quickview of price surge in nigeria/Yearly/1990_onwards.csv')

# Display the first 5 rows
print(df.head().to_markdown(index=False, numalign="left", stralign="left"))

| Year   | Debt % GDP   | Unemployment Rate (%)   | Export Rate (NGN/USD)   | Annual Increase (%)   | Unemployment, male (% of male labor force)   | Unemployment, female (% of female labor force)   | Unemployment, total (% of total labor force)   |
|:-------|:-------------|:------------------------|:------------------------|:----------------------|:---------------------------------------------|:-------------------------------------------------|:-----------------------------------------------|
| 1990   | 71.7         | nan                     | nan                     | nan                   | nan                                          | nan                                              | nan                                            |
| 1991   | 75           | 3.83                    | nan                     | nan                   | nan                                          | nan                                              | nan                                            |
| 19

Given the large number of missing values in several columns, we will focus our analysis and visualizations on the columns with sufficient data: Debt % GDP, Unemployment Rate (%), and Year. We will create the following visualizations:

Line Chart: GDP per capita (Debt % GDP) over time (Year).
Histogram: Distribution of Unemployment Rate (%).
Scatterplot Matrix: Relationships between Debt % GDP, Unemployment Rate (%), and Year.

In [None]:
# Print the column names and their data types
print(df.info())

# Filter data to include only years greater than or equal to 1990
df_filtered = df[df['Year'] >= 1990].copy()

# Convert the 'Year' column to datetime
df_filtered['Year'] = pd.to_datetime(df_filtered['Year'], format='%Y')

print(df_filtered.head().to_markdown(index=False, numalign="left", stralign="left"))


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34 entries, 0 to 33
Data columns (total 8 columns):
 #   Column                                          Non-Null Count  Dtype  
---  ------                                          --------------  -----  
 0   Year                                            34 non-null     int64  
 1   Debt % GDP                                      34 non-null     float64
 2   Unemployment Rate (%)                           32 non-null     float64
 3   Export Rate (NGN/USD)                           29 non-null     float64
 4   Annual Increase (%)                             29 non-null     float64
 5   Unemployment, male (% of male labor force)      29 non-null     float64
 6   Unemployment, female (% of female labor force)  29 non-null     float64
 7   Unemployment, total (% of total labor force)    29 non-null     float64
dtypes: float64(7), int64(1)
memory usage: 2.2 KB
None
| Year                | Debt % GDP   | Unemployment Rate (%)   | Export R

In [None]:
import plotly.express as px

# Create a line chart using Plotly Express with `Year` on the x-axis and `Debt % GDP` on the y-axis.
fig_line = px.line(df_filtered, x='Year', y='Debt % GDP', title='Debt % GDP Over Time')

# Display the chart
fig_line.show()

# Create a histogram using Plotly Express with `Unemployment Rate (%)` on the x-axis.
fig_hist = px.histogram(df_filtered, x='Unemployment Rate (%)', title='Distribution of Unemployment Rate (%)')

# Display the chart
fig_hist.show()

# Create a scatterplot matrix using Plotly Express with the dimensions `Debt % GDP`, `Unemployment Rate (%)`, and `Year`.
fig_scatter_matrix = px.scatter_matrix(df_filtered, dimensions=['Debt % GDP', 'Unemployment Rate (%)', 'Year'],
                                      title='Scatterplot Matrix of Economic Indicators')

# Display the chart
fig_scatter_matrix.show()


**BAR CHARTS **

Since the Year column represents time, it will be used as the x-axis for the bar charts. The other columns will be plotted on the y-axis, one at a time.

We will create separate bar charts for each of these columns to avoid clutter and ensure clarity.

In [None]:
# Create a bar chart for 'Debt % GDP' over time
fig1 = px.bar(df_filtered, x='Year', y='Debt % GDP', title='Debt % GDP Over Time')
fig1.update_xaxes(
    tickformat="%Y",  # Format x-axis labels as years
    dtick="M12",      # Show ticks for each year
    title_text='Year'
)
fig1.update_yaxes(title_text='Debt % GDP')

# Create a bar chart for 'Unemployment Rate (%)' over time
fig2 = px.bar(df_filtered, x='Year', y='Unemployment Rate (%)', title='Unemployment Rate (%) Over Time')
fig2.update_xaxes(
    tickformat="%Y",  # Format x-axis labels as years
    dtick="M12",      # Show ticks for each year
    title_text='Year'
)
fig2.update_yaxes(title_text='Unemployment Rate (%)')

# Create a bar chart for 'Unemployment, male (% of male labor force)' over time
fig3 = px.bar(df_filtered, x='Year', y='Unemployment, male (% of male labor force)', title='Unemployment, Male (%) Over Time')
fig3.update_xaxes(
    tickformat="%Y",  # Format x-axis labels as years
    dtick="M12",      # Show ticks for each year
    title_text='Year'
)
fig3.update_yaxes(title_text='Unemployment, Male (%)')

# Create a bar chart for 'Unemployment, female (% of female labor force)' over time
fig4 = px.bar(df_filtered, x='Year', y='Unemployment, female (% of female labor force)', title='Unemployment, Female (%) Over Time')
fig4.update_xaxes(
    tickformat="%Y",  # Format x-axis labels as years
    dtick="M12",      # Show ticks for each year
    title_text='Year'
)
fig4.update_yaxes(title_text='Unemployment, Female (%)')

# Create a bar chart for 'Unemployment, total (% of total labor force)' over time
fig5 = px.bar(df_filtered, x='Year', y='Unemployment, total (% of total labor force)', title='Unemployment, Total (%) Over Time')
fig5.update_xaxes(
    tickformat="%Y",  # Format x-axis labels as years
    dtick="M12",      # Show ticks for each year
    title_text='Year'
)
fig5.update_yaxes(title_text='Unemployment, Total (%)')

# Display all charts
fig1.show()
fig2.show()
fig3.show()
fig4.show()
fig5.show()


These columns appear to represent economic indicators related to Nigeria, primarily focusing on debt, unemployment, and export rates.

Based on the descriptive statistics, we can see the following:

The data spans from 1990 to 2023.
Debt % GDP ranges from 7.3% to 75%, with a mean of 34.09%.
Unemployment Rate (%) ranges from 3.51% to 5.63%, with a mean of 4.09%.
The unemployment columns (male, female, total) have similar ranges and means, suggesting a relatively balanced distribution of unemployment across genders.

In [None]:
# Filter to include only numeric columns
numeric_df = df.select_dtypes(include='number')

# Calculate the correlation matrix
correlation_matrix = numeric_df.corr()

# Create a Plotly heatmap
fig = px.imshow(correlation_matrix,
                x=correlation_matrix.columns,
                y=correlation_matrix.columns,
                color_continuous_scale='RdBu_r',  # Red-Blue diverging color scale
                title='Correlation Heatmap of Economic Indicators')

# Add text annotations to the heatmap
for i in range(len(correlation_matrix.columns)):
    for j in range(len(correlation_matrix.columns)):
        fig.add_annotation(x=correlation_matrix.columns[i],
                           y=correlation_matrix.columns[j],
                           text=f"{correlation_matrix.iloc[i, j]:.2f}",
                           showarrow=False)

# Display the heatmap
fig.show()


 The heatmap displays the correlation coefficients between each pair of variables, with a color scale ranging from red (negative correlation) to blue (positive correlation). The values in each cell represent the correlation coefficient between the corresponding row and column variables.

In [None]:
# Melt the DataFrame to long format for Plotly
df_melted = df_filtered.melt(id_vars='Year', value_vars=['Debt % GDP', 'Unemployment, total (% of total labor force)'],
                             var_name='Indicator', value_name='Percentage')

# Create a Plotly line plot with two lines
fig = px.line(df_melted, x='Year', y='Percentage', color='Indicator',
              title='Debt % GDP and Unemployment Rate Over Time')

# Update y-axis label
fig.update_yaxes(title_text='Percentage')

# Display the plot
fig.show()


In [None]:
# Drop null values in `Export Rate (NGN/USD)` and `Annual Increase (%)`
df_filtered = df_filtered.dropna(subset=['Export Rate (NGN/USD)', 'Debt % GDP'])

# Melt the DataFrame to long format for Plotly
df_melted = df_filtered.melt(id_vars='Year', value_vars=['Export Rate (NGN/USD)', 'Debt % GDP'],
                             var_name='Indicator', value_name='Value')

# Create a Plotly line plot with two lines
fig = px.line(df_melted, x='Year', y='Value', color='Indicator',
              title='Export Rate (NGN/USD) and Debt % GDP')

# Update y-axis label
fig.update_yaxes(title_text='Value')

# Display the plot
fig.show()


In [None]:
import numpy as np
import altair as alt

# Calculate descriptive statistics
desc_stats = numeric_df.describe().round(2)

# Print descriptive statistics
print("Descriptive Statistics:")
print(desc_stats.to_markdown(numalign="left", stralign="left"))

# Calculate and print the number and percentage of missing values
missing_values = df_filtered.isnull().sum()
missing_percent = (missing_values / len(df_filtered) * 100).round(2)

print("\nMissing Values:")
print(pd.DataFrame({'Missing Values': missing_values, 'Percentage': missing_percent}).to_markdown(numalign="left", stralign="left"))

# Calculate the correlation matrix
correlation_matrix = numeric_df.corr()

# Create a heatmap to visualize the correlation matrix
base = alt.Chart(correlation_matrix.reset_index().melt('index'),
                 title='Correlation Heatmap of Economic Indicators').encode(
    x=alt.X('index:O', axis=alt.Axis(labelAngle=-45)),
    y=alt.Y('variable:O'),
    color=alt.Color('value:Q', scale=alt.Scale(scheme='redblue', reverse=True)),
    tooltip=['index:O', 'variable:O', 'value:Q']
)

# Add text labels to the heatmap
text = base.mark_text(baseline='middle').encode(
    text=alt.Text('value:Q', format='.2f'),
    color=alt.condition(
        alt.datum.value > 0.5,
        alt.value('white'),
        alt.value('black')
    )
)

chart = base.mark_rect().encode(
) + text

# Save the chart
chart.save('correlation_heatmap.json')

# Melt the DataFrame to long format for Plotly
df_melted = df_filtered.melt(id_vars='Year', value_vars=['Debt % GDP', 'Unemployment Rate (%)', 'Export Rate (NGN/USD)', 'Annual Increase (%)'],
                             var_name='Indicator', value_name='Value')

# Create a Plotly line plot with multiple lines
fig = px.line(df_melted, x='Year', y='Value', color='Indicator',
              title='Economic Indicators Over Time')

# Update y-axis label
fig.update_yaxes(title_text='Value')

# Display the plot
fig.show()


Descriptive Statistics:
|       | Year    | Debt % GDP   | Unemployment Rate (%)   | Export Rate (NGN/USD)   | Annual Increase (%)   | Unemployment, male (% of male labor force)   | Unemployment, female (% of female labor force)   | Unemployment, total (% of total labor force)   |
|:------|:--------|:-------------|:------------------------|:------------------------|:----------------------|:---------------------------------------------|:-------------------------------------------------|:-----------------------------------------------|
| count | 34      | 34           | 32                      | 29                      | 29                    | 29                                           | 29                                               | 29                                             |
| mean  | 2006.5  | 34.09        | 4.09                    | 49.66                   | 0.05                  | 4.08                                         | 4.12                                        

Data Summary and Missing Values:

The dataset contains information on debt as a percentage of GDP, unemployment rates (total and by gender), export rates, and annual increases.
There are missing values in the dataset, particularly in the columns related to export rates, annual increases, and unemployment figures. These missing values range from 5.88% to 14.71% of the data, which could impact the reliability of analysis and conclusions drawn from these specific columns.
Correlation Analysis:

The correlation heatmap reveals moderate positive correlations between the three unemployment columns, indicating that they tend to move together. This suggests that changes in overall unemployment are generally reflected in both male and female unemployment rates.
There is a weak positive correlation between Debt % GDP and Annual Increase (%), suggesting a slight tendency for these two variables to increase together. However, this correlation is not strong enough to draw definitive conclusions.
Other correlations are weak, indicating that there aren't strong linear relationships between the remaining pairs of variables.
Trend Analysis:

The line plot titled 'Economic Indicators Over Time' shows the evolution of Debt % GDP, Unemployment Rate (%), Export Rate (NGN/USD), and Annual Increase (%) over the years.
We can observe fluctuations in these indicators, with periods of increase and decrease.
Notably, there are some missing data points in the Export Rate (NGN/USD) and Annual Increase (%) lines, which should be considered when interpreting the trends.
Key Considerations:

The missing values in the dataset, particularly in the export-related and annual increase columns, limit the scope of analysis for these variables.
While the correlation analysis provides some insights into potential relationships, further investigation is needed to understand the underlying factors driving these correlations.
The trend analysis reveals the dynamic nature of these economic indicators, but the missing data points warrant caution in interpreting the trends, especially for export rates and annual increases.
Overall, this analysis provides a preliminary overview of Nigeria's economic indicators from 1990 to 2023. However, addressing the missing data and conducting more in-depth analysis would be crucial for drawing more robust conclusions and understanding the complex interplay between these indicators.

In [None]:
import pandas as pd
import plotly.express as px

# Read the CSV file into a DataFrame
df_90s = pd.read_csv('1990_onwards.csv')

# Filter data to include only years greater than or equal to 1990
df_90s_filtered = df_90s[df_90s['Year'] >= 1990].copy()

# Convert the 'Year' column to datetime
df_90s_filtered['Year'] = pd.to_datetime(df_90s_filtered['Year'], format='%Y')

# Create a list of columns to plot, excluding 'Year'
columns_to_plot = df_90s_filtered.columns.tolist()
columns_to_plot.remove('Year')

# Create and display bar charts for each column
for column in columns_to_plot:
    fig = px.bar(df_90s_filtered, x='Year', y=column, title=f'{column} Over Time')
    fig.update_xaxes(
        tickformat="%Y",  # Format x-axis labels as years
        dtick="M12",      # Show ticks for each year
        title_text='Year'
    )
    fig.update_yaxes(title_text=column)
    fig.show()
