# Analysis_on_progress_towards_SDG7
### By Hafsah Anibaba

The data to be used has been extracted from the original data retrieved from the worldbank website through this <a href= 'api.worldbank.org/v2/country'>link</a><br>
After data wrangling process <a href= '../Data_wrangling_on_progress_towards_SDG7/Target7_1.ipynb'>here</a>, data was extracted and cleaned for the goal target

In [None]:
# import all necessay libraies

%matplotlib inline
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

## Target 7.1: By 2030, ensure universal access to affordable, reliable and modern energy services
<b> 
    * Check trend in access to energy over the years
    * Check countries with high access and countries with low access
    * Check countries with significantly high change
    * Check countries with significantly low change

In [None]:
#read in the data available for all countries on access to electricity

data1 = pd.read_csv('../data/extracted_data/years_with_data_for_all_countries.csv', index_col = 'Country Name')

data1.head(2)

In [None]:
#read in the data of countries that had data for all the years

data2 = pd.read_csv('../data/extracted_data/countries_that_had_data_for_all_years.csv', index_col = 'Country Name')

data2.head(2)

### EDA

In [None]:
print(f'This data has records on {data1.shape[0]} countries for {data1.shape[1]} years')

In [None]:
data1T = data1.T
 
data1T.head()

In [None]:
data1T['mean per year'] = data1T.mean(axis = 1)

data1T.head()

In [None]:
data1T.loc['mean per country'] = data1T.mean(axis = 0)


data1T.tail()

In [None]:
average_access_per_country = data1T.iloc[-1]
average_access_per_country

In [None]:
data1T = data1T.drop(data1T.index[-1])

In [None]:
data1T.index = pd.to_datetime(data1T.index)

In [None]:
sns.lineplot(y = data1T['mean per year'], x = data1T.index)
plt.xlim(pd.Timestamp('2008-01-01'), pd.Timestamp('2021-01-01'))
plt.title('Global trend in access to electricity from 2009 to 2020', pad = 7,
         fontdict = {'color': 'green', 'weight': 'bold'});
plt.xlabel('years', fontdict = {'color': 'green', 'weight': 'light', 'style': 'italic'})
plt.ylabel('average global access \n to electricity', 
           fontdict = {'color': 'green', 'weight': 'light', 'style': 'italic'});
plt.text(pd.Timestamp('2022-01-01'), 82, 'It can be seen there has been linear increase in the\n\
average global access to electricity', fontdict = {'color': 'blue', 'weight': 'light',\
                                                   'style': 'italic'});

In [None]:
full_access = average_access_per_country[average_access_per_country == 100].index.to_list()

In [None]:
percent = (len(full_access)/ data1.shape[0]) *100
print(f'{percent:.2f}% of the countries have average of 100% of their population with \
access to electricity from 2009 to 2020')

In [None]:
def world_cloud(data, title):
    word_could_dict=Counter(data)
    wordcloud = WordCloud(width = 1000, height = 500).generate_from_frequencies(word_could_dict)

    plt.figure(figsize=(15,8))
    plt.title(title, pad = 7,
         fontdict = {'color': 'green', 'weight': 'bold', 'fontsize': 32});

    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()
    

In [None]:
world_cloud(full_access, 'Countries with average of 100% access to electricity')

In [None]:
plt.title('Distribution of frequency of average access to electricity', pad = 7,
         fontdict = {'color': 'green', 'weight': 'bold', 'fontsize': 18});
sns.histplot(data = average_access_per_country, bins = 10);
dist = {'mean of distribution': average_access_per_country.mean(),\
       'median of distribution': average_access_per_country.median(),\
       'mode of distribution': int(average_access_per_country.mode().values)}

for i in range(len(dist)):
    plt.text(120, 120 -(20 *i), f'{list(dist)[i]} is {dist[list(dist)[i]]:0.2f}')
    

<b> Most countries have access between 90% to 100% to electricity between 2009 and 2020.

In [None]:
less_access = average_access_per_country[average_access_per_country < 50].index.to_list()

In [None]:
percent = (len(less_access)/ data1.shape[0]) *100
print(f'{percent:.2f}% of the countries have less than 50% average of their population with access to electricity')

In [None]:
world_cloud(less_access, 'countries that have less than 50% average of their population with access to electricity')

In [None]:
data_change = data1.copy()
data_change.head()

In [None]:
data_change['change'] = data_change['2020'] - data_change['2009']

In [None]:
positive_change = data_change[data_change['change'] > 0]
print(f'Number of countries with positive change is {len(positive_change)}')

In [None]:
negative_change = data_change[data_change['change'] < 0]
print(f'Number of countries with negative change is {len(negative_change)}')

In [None]:
no_change = data_change[data_change['change'] == 0]

print(f'Number of countries with no change is {len(no_change)}')

In [None]:
high = str(data_change[data_change['change'] == data_change['change'].max()].index[0])
print(f'The country with highest change is {high}')

In [None]:
low = str(data_change[data_change['change'] == data_change['change'].min()].index[0])
print(f'The country with highest change is {low}')

In [None]:
def plot_trends(data, observation, title):
    plt.figure(figsize = (10,5))
    sns.lineplot(data = data)
    plt.ylim(0, 120)
    plt.legend(bbox_to_anchor = (1.5, 1))
    plt.title(title, pad = 7,
         fontdict = {'color': 'green', 'weight': 'bold', 'fontsize': 18})
    plt.text(0, -10, f'{observation}', fontdict = {'color': 'green', 'weight': 'bold', 'fontsize': 20},
        wrap =True, verticalalignment = 'top');

In [None]:
plot_data = negative_change.iloc[:,:-1].T
observation = '''There were slight drop in the % access to electricity of the countries, except libya 
that had a linear drop in electricity but is increasing back,Turks also had sudden drop in 2012 but picked again
in 2013, Syrian Arab Republic might still be reducing'''
title = 'Trend in access to electricity for countries with negattive change'
plot_trends(plot_data, observation, title)

In [None]:
plot_data = no_change.iloc[:,:-1].T
observation = '''Countries with no change in access
from 2009 to 2020 had average of 100% of their
population with access to electricity'''
title = 'Trend in access to electricity for countries with no change'
plot_trends(plot_data, observation, title)

<b>many countries had positive change I want to check trend in countries that had higher than 30% change from 2009 to 2020

In [None]:
greater_than_30 = positive_change[positive_change['change'] > 30]
plot_data = greater_than_30.iloc[:,:-1].T
observation = '''Afghanistan has had some sudden changes,other countries have been increasing
steadily But Cambodia had a sudden high increase from 2016 to 2017 and a sudden decrease from
2017 t0 2018'''
title = 'Trend in access to electricity for countries with positive change greater than 30%'
plot_trends(plot_data, observation, title)

In [None]:
less_than_5 = positive_change[positive_change['change'] < 5]
length = len(less_than_5)
plot_data = less_than_5.iloc[:length // 2,:-1].T
observation = '''Many countries with low positive change 
already have 100% of their population with 
access to electricity, Equatorial Guinea is 
still between 65% and 66%'''
title = 'Trend in access to electricity for countries with positive change less than 5%'
plot_trends(plot_data, observation, title)

<b> Because many countries had less than 5% change I will divide them into two parts for visualisation

In [None]:
less_than_5 = positive_change[positive_change['change'] < 5]
length = len(less_than_5)
plot_data = less_than_5.iloc[length // 2:,:-1].T
observation = '''South Sudan and Chad has made no progress and has very low percent of
access to electricity'''
title = 'Trend in access to electricity for countries with positive change greater than 5%'
plot_trends(plot_data, observation, title)

In [None]:
data2T = data2.T

In [None]:
data2T['mean per year'] = data2T.mean(axis = 1)

data2T.head()

In [None]:
sns.lineplot(y = data2T['mean per year'], x = data2T.index)
plt.ylim(top = 100)
plt.title('Global trend in access to electricity from 1990 to 2020', pad = 7,
         fontdict = {'color': 'green', 'weight': 'bold'});
plt.xlabel('years', fontdict = {'color': 'green', 'weight': 'light', 'style': 'italic'})
plt.ylabel('average global access \n to electricity', 
           fontdict = {'color': 'green', 'weight': 'light', 'style': 'italic'});
plt.xticks(rotation = 90);
plt.text(35, 97, 'It can be seen there has been linear increase in the\n\
 average global access to electricity', fontdict = {'color': 'blue', 'weight': 'light',\
                                                'style': 'italic'});

### Conclusion
<b> From the analysis there has been significant progress towards this target of SDG7. 
    * More focus can be directed to countries with small positive change and small % of thir population with access to electricity
    * Countries with no change can still maintain their strategies because they have high percent access to electricity
    * There can be more investigations on `Turks and Caicos Island` and `Syrian Arabic Republic` 
    to see why there might be decrease in access to electricity
    * Afghanistan can be studied  to see why they sometimes have sudden drop and how they manage 
    to get back on track immediately
    * Solomon Islands can also be studied to see how they were able to increase access to electricity and even though they are still increasing, there should be more studies to see how they can increase
    

In [None]:
!jupyter nbconvert Target_7_1.ipynb --to slides --post serve --no-input --no-prompt