### __Project__

<u>Research Question</u>: What are the major energy sources in the subregions of Africa and which subregion produces the highest amount of energy for consumption from 1980 till 2019

<u>Sources of dataset</u>:


    1. energy.csv
    2. https://www.worldometers.info/geography/how-many-countries-in-africa/

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display


In [2]:
# Load in energy sources dataset
energy_sources = pd.read_csv('energy.csv')
energy_sources.head(20)

Unnamed: 0.1,Unnamed: 0,Country,Energy_type,Year,Energy_consumption,Energy_production,GDP,Population,Energy_intensity_per_capita,Energy_intensity_by_GDP,CO2_emission
0,0,World,all_energy_types,1980,292.89979,296.337228,27770.910281,4298127.0,68.145921,10.547,4946.62713
1,1,World,coal,1980,78.656134,80.114194,27770.910281,4298127.0,68.145921,10.547,1409.790188
2,2,World,natural_gas,1980,53.865223,54.761046,27770.910281,4298127.0,68.145921,10.547,1081.593377
3,3,World,petroleum_n_other_liquids,1980,132.064019,133.111109,27770.910281,4298127.0,68.145921,10.547,2455.243565
4,4,World,nuclear,1980,7.5757,7.5757,27770.910281,4298127.0,68.145921,10.547,0.0
5,5,World,renewables_n_other,1980,20.702344,20.775178,27770.910281,4298127.0,68.145921,10.547,0.0
6,6,Afghanistan,all_energy_types,1980,0.026583,0.072561,,13356.5,1.990283,0.0,
7,7,Afghanistan,coal,1980,0.002479,0.002355,,13356.5,1.990283,0.0,
8,8,Afghanistan,natural_gas,1980,0.002094,0.06282,,13356.5,1.990283,0.0,
9,9,Afghanistan,petroleum_n_other_liquids,1980,0.014624,0.0,,13356.5,1.990283,0.0,


In [3]:
# Loads in data containing african country from web
df = pd.read_html('https://www.worldometers.info/geography/how-many-countries-in-africa/')
# The desired data is the first table on the webpage
african_countries = df[0]
african_countries.head()

Unnamed: 0,#,Country,Population (2024),Subregion
0,1,Nigeria,232679478,Western Africa
1,2,Ethiopia,132059767,Eastern Africa
2,3,Egypt,116538258,Northern Africa
3,4,DR Congo,109276265,Middle Africa
4,5,Tanzania,68560157,Eastern Africa


We have data containing energy consumption from various energy sources and we have another containing the african countries and where in Africa they can be found, now we must clean both DataFrames to use them for analysis

Beginning with the energy_sources dataframe

In [4]:
# Remove unnecessary columns
energy_sources = energy_sources[['Country','Energy_type','Year','Energy_consumption']]
# Replacing names of some countries to aid analysis
energy_sources['Country'] = energy_sources['Country'].replace({'Congo-Brazzaville':'Congo', 'Congo-Kinshasa':'DR Congo','Gambia, The':'Gambia',
                                                                'Sao Tome and Principe':'Sao Tome & Principe',"Côte d’Ivoire":"Côte d'Ivoire"})
# Converting energy values from quads to petajoules(PJ)
energy_sources['Energy_consumption'] = energy_sources['Energy_consumption']*1055.06
# Remove 'all_energy_types' column creating a new dataframe 'energy' which does not contain all_energy_types
# Still keeping the column that contains 'all_energy_types for future analysis                                                         
energy = energy_sources[energy_sources['Energy_type']!='all_energy_types']                                                                
# Rearange table 
energy = energy.pivot(index=['Country','Energy_type'],columns='Year',values='Energy_consumption')
# Filling up empty cells with 0, meaning zero quads of energy was produced in such cells
energy = energy.fillna(0)
energy.head()

Unnamed: 0_level_0,Year,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
Country,Energy_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Afghanistan,coal,2.615756,2.747642,3.187265,3.187265,3.253209,3.319152,3.516982,3.67085,3.033397,2.791605,...,15.936327,32.532047,27.25658,29.608559,33.354141,30.004219,37.32393,48.160618,53.743821,47.0176
Afghanistan,natural_gas,2.209296,3.313943,5.523239,5.523239,5.523239,0.0,0.0,24.302252,77.325347,75.116052,...,5.535562,6.296263,6.253352,6.027092,5.535562,5.703307,6.448404,6.448404,6.328606,5.316525
Afghanistan,nuclear,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Afghanistan,petroleum_n_other_liquids,15.429301,17.585308,17.145675,24.179798,24.246044,28.576125,29.889594,30.253964,28.881883,29.223704,...,90.201129,116.902254,103.760325,73.801616,58.329044,73.765861,54.35409,50.036873,63.871303,51.223345
Afghanistan,renewables_n_other,7.792536,7.951581,7.797929,8.280041,8.338224,8.487103,7.858085,8.310459,8.103975,8.254789,...,13.392242,14.187694,18.464668,21.922211,23.389119,23.770284,25.922947,27.174628,27.218007,25.949479


Now we clean the african_countries dataframe

In [5]:
# Remove unnecessay columns
african_countries = african_countries[['Country','Subregion']]
# we replace the Middle Africa with Central Africa for better understanding
african_countries['Subregion'] = african_countries['Subregion'].replace({'Middle Africa':'Central Africa'})
# Setting the index to Country which is the same as the index of the energy_sources
# dataframe so that we can merge both dataframes on their respective indices
african_countries = african_countries.set_index('Country')
african_countries.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  african_countries['Subregion'] = african_countries['Subregion'].replace({'Middle Africa':'Central Africa'})


Unnamed: 0_level_0,Subregion
Country,Unnamed: 1_level_1
Nigeria,Western Africa
Ethiopia,Eastern Africa
Egypt,Northern Africa
DR Congo,Central Africa
Tanzania,Eastern Africa


In [6]:
# Merge both dataframes
# Inner merge to eliminate countries that are not african 
full_df = pd.merge(african_countries,energy,how='inner',left_index=True,right_index=True)
# Resetting the index of the dataframe, making 'Energy_type' a column to ease future plotting
full_df.reset_index(level='Energy_type', inplace=True) 
full_df.head()

Unnamed: 0_level_0,Energy_type,Subregion,1980,1981,1982,1983,1984,1985,1986,1987,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Algeria,coal,Northern Africa,2.687658,8.076552,21.603198,35.042577,36.120742,36.127504,36.515849,37.010786,...,0.049118,0.057981,0.457282,1.328976,0.881321,0.608688,0.60689,0.975113,0.433015,0.427759
Algeria,natural_gas,Northern Africa,572.686568,430.759897,688.468852,816.700845,846.580144,727.062947,749.472422,816.700845,...,1140.393838,1301.046127,1418.330213,1303.736855,1547.366448,1567.784325,1573.600997,1633.231771,1773.004987,1857.578985
Algeria,nuclear,Northern Africa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Algeria,petroleum_n_other_liquids,Northern Africa,245.555547,257.027669,289.409108,275.242229,304.407703,333.933586,336.362194,342.065441,...,723.542262,726.038121,776.468685,833.742199,869.45368,906.56558,885.066623,849.064788,871.874692,920.787681
Algeria,renewables_n_other,Northern Africa,2.750471,4.053531,5.134435,2.521328,5.939241,6.909998,2.661028,5.217408,...,11.658671,17.553788,18.475065,10.96703,8.829881,6.312768,5.505907,7.93695,8.978949,10.259486


In [7]:
# Now splitting the full_df dataframe into North, West, Central, East and South parts of Africa
north = full_df[full_df['Subregion']=='Northern Africa']
west = full_df[full_df['Subregion']=='Western Africa']
central = full_df[full_df['Subregion']=='Central Africa']
east = full_df[full_df['Subregion']=='Eastern Africa']
south = full_df[full_df['Subregion']=='Southern Africa']

# Checking one the split dataframe
central.head()

Unnamed: 0_level_0,Energy_type,Subregion,1980,1981,1982,1983,1984,1985,1986,1987,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Angola,coal,Central Africa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Angola,natural_gas,Central Africa,10.16276,9.720901,11.046478,13.255774,13.255774,14.360422,15.465069,17.674365,...,28.633808,29.336,29.648085,15.91634,9.011457,30.155223,31.325532,31.715635,33.389549,33.583674
Angola,nuclear,Central Africa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Angola,petroleum_n_other_liquids,Central Africa,45.608246,45.483633,47.757815,50.031996,43.327833,56.854541,54.125523,54.342911,...,232.973163,243.80391,260.571508,303.338679,344.217657,340.930234,295.218972,259.114637,237.810596,272.471917
Angola,renewables_n_other,Central Africa,5.808782,5.845129,6.617761,6.65955,6.608896,6.613327,7.273963,7.255162,...,37.837676,40.778331,37.629736,47.653994,50.609582,50.114235,57.762701,75.759767,85.147419,86.878063


Before plotting, remember that we still have our energy_sources dataframe that contains the all_energy_types (which we removed while cleaning to produce the energy dataframe). We shall clean this dataframe once more so we can use it to plot the total amount of energy consumed by each subregion on the African continent


Cleaning the energy_sources dataframe once again

In [8]:
# Removing unnecessary rows
energy_sources= energy_sources[energy_sources['Energy_type']=='all_energy_types']
# Rearange table
energy_sources = energy_sources.pivot(index=['Country','Energy_type'],columns='Year',values='Energy_consumption')
# Filling up empty cells with 0, meaning zero quads of energy was produced
energy_sources = energy_sources.fillna(0)

In [9]:
# Merge both dataframes
# Inner merge to eliminate countries that are not african
total_energy = pd.merge(african_countries,energy_sources,how='inner',left_index=True,right_index=True)
# Resetting the index of the dataframe, making 'Energy_type' a column to ease future plotting
total_energy.reset_index(level='Energy_type', inplace=True)                    

In [10]:
# Preparing the merged dataframe for plotting
# Grouping the countries by the sum of the energy consumed by their subregions
grouped_total_energy = total_energy.groupby('Subregion').sum().reset_index()
# Transposing the dataframe
grouped_total_energy = grouped_total_energy.T
# Set the first row as column headers
grouped_total_energy.columns = grouped_total_energy.iloc[0]  
# Dropping unnecessary columns
grouped_total_energy.drop('Subregion',inplace=True)
grouped_total_energy.drop('Energy_type',inplace=True)
# Resetting index
grouped_total_energy = grouped_total_energy.reset_index()
# Checking dataframe
grouped_total_energy.head(10)

Subregion,index,Central Africa,Eastern Africa,Northern Africa,Southern Africa,Western Africa
0,1980,256.41678,756.306263,2419.485566,2796.484567,763.049222
1,1981,268.455872,642.556909,2381.858663,3019.08854,868.229582
2,1982,281.07944,636.515499,2780.707248,3224.176141,862.942026
3,1983,299.29762,665.772893,3100.993448,3321.87764,830.328588
4,1984,287.42942,618.057367,3265.638368,3585.404978,873.224343
5,1985,308.498499,640.449712,3315.400473,3658.545931,935.527433
6,1986,315.705473,651.836645,3413.579089,3712.01709,929.391327
7,1987,316.625437,655.892714,3541.963231,3849.23395,982.895995
8,1988,325.999845,695.08297,3680.97751,3936.160264,1048.687014
9,1989,351.215665,716.651107,3753.670158,3957.460538,1121.029914


Now lets proceed to making the interactive plot that would show us the amount of nergy consumed from the different energy sources from the 5 different subregions for all the years from 1980 to 2019

Making the plot..

In [11]:

def bar_plotting(dataframe,year,ax):
    sns.set()
    # Group data by 'Energy_type' and aggregate by sum
    grouped_df = dataframe.groupby('Energy_type')[year].sum().reset_index()

    # Define bar plot parameters
    sources = ['Coal', 'Natural gas', 'Nuclear', 'Petroleum', 'Renewables \nand Others']
    # Define positions
    pos = np.arange(len(sources))

    # Create bar plot with aggregated data
    bars = ax.bar(grouped_df['Energy_type'], grouped_df[year], align='center', linewidth=0, color=['saddlebrown', 'skyblue', 'red', 'yellow', 'green'])

    # Set yticks, xticks and labels
    plt.yticks([])
    plt.xticks(pos, sources, alpha=0.8,rotation=45)

    # Remove plot borders
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Loop over each bar to add height labels
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width() / 2, height + 2, f'{height:.2f} PJ', ha='center', color='black',fontweight='bold',fontsize=10)

def static_plot(ax):
    # Static plot 
    ax.plot(grouped_total_energy['index'],grouped_total_energy['Central Africa'],label='Central Africa')
    ax.plot(grouped_total_energy['index'],grouped_total_energy['Southern Africa'],label='South Africa')
    ax.plot(grouped_total_energy['index'],grouped_total_energy['Eastern Africa'],label='East Africa')
    ax.plot(grouped_total_energy['index'],grouped_total_energy['Western Africa'],label='West Africa')
    ax.plot(grouped_total_energy['index'],grouped_total_energy['Northern Africa'],label='North Afica')
    ax.set_title('Energy Consumption of African Subregions\nfrom 1980 to 2019', fontsize=16, fontweight='bold')
    ax.set_xlabel('Year', fontsize=12)
    ax.set_ylabel('Consumption (PJ)', fontsize=12)
    ax.legend(title="Subregion", loc='upper left')
    

def update_plot(change):
    year = y_slider.value
    with out:
        # clearing the output widget before plotting a new plot
        out.clear_output(wait=True)

        # Getting the size of the figure and creating the grid
        fig = plt.figure(figsize=(12,12))
        gs = gridspec.GridSpec(3,3,figure=fig)

        # Plotting the interactive North Africa on the (0,0) axis of the 3x3 grid
        ax1 = fig.add_subplot(gs[0, 0])
        bar_plotting(north, int(year), ax1)
        ax1.set_title('Energy Sources of North African\nCountries',fontweight='bold', fontsize=16)
        
        # Plotting interactive West Africa on the (1,0) axis of the 3x3 grid
        ax2 = fig.add_subplot(gs[1, 0])
        bar_plotting(west, int(year), ax2)
        ax2.set_title('Energy Sources of West African\nCountries',fontweight='bold', fontsize=16)

        # Plotting interactive Central Africa on the (2,0) axis of the 3x3 grid
        ax3 = fig.add_subplot(gs[2, 0])
        bar_plotting(central, int(year), ax3)
        ax3.set_title('Energy Sources of Central African\nCountries',fontweight='bold', fontsize=16)
        
        # Plotting interactive East Africa on the (2,1) axis of the 3x3 grid
        ax4 = fig.add_subplot(gs[2, 1])
        bar_plotting(east, int(year), ax4)
        ax4.set_title('Energy Sources of East African\nCountries',fontweight='bold', fontsize=16)

        # Plotting interactive South Africa on the (2,2) axis of the 3x3 grid
        ax5 = fig.add_subplot(gs[2, 2])
        bar_plotting(south, int(year), ax5)
        ax5.set_title('Energy Sources of South African\nCountries',fontweight='bold', fontsize=16)

        # Plotting the static plot on the (:2,1:) axis on the grid
        ax6 = fig.add_subplot(gs[:2, 1:])
        static_plot(ax6)  

        # Set the supertitle of the grid
        fig.suptitle('Energy Consumption and Major Energy Sources in African Subregions from 1980 till 2019\n in Petajoules(PJ)', fontweight='bold',fontsize=20)

        plt.tight_layout()
        plt.show()

# Main function to initialize and display all components
def main():
        # Make out and y_slider accessible in all other functions
        global out, y_slider

        # Set up slider(y_slider) and output widget(out)
        y_slider = widgets.FloatSlider(value=1980, min=1980, max=2019, step=1,
                                description='Year', orientation='vertical',
                                style={'description_width': 'initial'},
                                layout=widgets.Layout(width='70px', height='900px', padding='400px 0 100px 0'))
    
        out = widgets.Output()

        # Combines the slider and output widget in one horizontal layout side by side
        hbox = widgets.HBox([y_slider, out])
        # Display the HBox containing the slider and output widget
        display(hbox)
        
        # Initial plot
        update_plot(None)

        # Set the y_slider to update plot whenever the slider's value changes
        y_slider.observe(update_plot, names='value')
    # Run main function to start the widget and plot display
main()                        

HBox(children=(FloatSlider(value=1980.0, description='Year', layout=Layout(height='900px', padding='400px 0 10…