# In Class data exploration Oct 22, 2024

In [2]:
# Import the libraries
import numpy as np                  # Numerical Python
import pandas as pd                 # Data Analysis
import matplotlib.pyplot as plt     # Plotting
import seaborn as sns               # Statistical Data Visualization

# Let's make sure pandas returns all the rows and columns for the dataframe
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Force pandas to display full numbers instead of scientific notation
#pd.options.display.float_format = '{:.0f}'.format

# Library to suppress warnings
import warnings
warnings.filterwarnings('ignore')

## Read the dataset

In [4]:
df = pd.read_csv('EnergyWorldDevelopmentIndicators_Data.csv')

In [7]:
df.head()

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Afghanistan,AFG,Access to electricity (% of population),EG.ELC.ACCS.ZS,4.4,9.3,14.1,19,23.8,28.7,33.5,38.4,42.4,48.3,42.7,43.2,69.1,68,89.5,71.5,97.7,97.7,93.4,97.7,97.7,97.7,85.3
1,Afghanistan,AFG,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,..,..,..,2.1,7.8,15.4,19.3,25,28,36.7,30.2,29.6,60.8,60.2,86.5,64.6,97.1,97.1,91.6,97.1,97.1,97.1,81.7
2,Afghanistan,AFG,"Access to electricity, urban (% of urban popul...",EG.ELC.ACCS.UR.ZS,73.4,74.8,76.1,77.5,78.8,74,81.6,83,89.9,85.9,82.8,86.6,95,92.2,98.7,92.5,99.5,99.5,98.8,99.5,99.5,99.5,95.9
3,Afghanistan,AFG,Alternative and nuclear energy (% of total ene...,EG.USE.COMM.CL.ZS,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..
4,Afghanistan,AFG,CO2 intensity (kg per kg of oil equivalent ene...,EN.ATM.CO2E.EG.ZS,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..


In [9]:
# Check the different names of the series
df['Series Name'].unique()

array(['Access to electricity (% of population)',
       'Access to electricity, rural (% of rural population)',
       'Access to electricity, urban (% of urban population)',
       'Alternative and nuclear energy (% of total energy use)',
       'CO2 intensity (kg per kg of oil equivalent energy use)',
       'Combustible renewables and waste (% of total energy)',
       'Energy imports, net (% of energy use)',
       'Energy related methane emissions (% of total)',
       'Energy use (kg of oil equivalent per capita)',
       'Energy use (kg of oil equivalent) per $1,000 GDP (constant 2021 PPP)',
       'Fossil fuel energy consumption (% of total)',
       'GDP per unit of energy use (constant 2021 PPP $ per kg of oil equivalent)',
       'GDP per unit of energy use (PPP $ per kg of oil equivalent)',
       'Investment in energy with private participation (current US$)',
       'Methane emissions in energy sector (thousand metric tons of CO2 equivalent)',
       'Nitrous oxide emiss

In [45]:
df_electricity = df.query('`Series Name` == "Access to electricity, rural (% of rural population)"')

df_electricity.head(266)

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
1,Afghanistan,AFG,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,..,..,..,2.1,7.8,15.4,19.3,25,28,36.7,30.2,29.6,60.8,60.2,86.5,64.6,97.1,97.1,91.6,97.1,97.1,97.1,81.7
20,Albania,ALB,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,100,100,100,100,100,100,100,100,100,100,100,100,99.9,99.8,99.9,100,99.7,99.7,100,100,100,100,100
39,Algeria,DZA,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,97.4,97.4,97.4,97.4,97.4,97.4,97.4,97.4,97.9,97.4,97.5,97.6,97.7,97.8,97.9,98.1,98.3,98.6,98.9,98.7,99.1,99.3,99.3
58,American Samoa,ASM,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..
77,Andorra,AND,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100
96,Angola,AGO,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,3.5,9.5,2.8,2.3,1.7,1,0.7,..,6.6,..,..,..,..,..,..,3.8,..,..,7.3,..,..,..,..
115,Antigua and Barbuda,ATG,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,96.6,96.7,100,97,97.1,100,100,100,100,98,98.2,92.7,100,100,100,100,100,100,100,100,100,100,100
134,Argentina,ARG,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,91.8,88.6,92.6,92.9,93.2,93.4,93.6,93.8,94,94.2,98.6,97.8,96.1,95.4,100,97.6,99.3,100,99.9,100,100,100,100
153,Armenia,ARM,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,98.5,100,95.8,98.9,98.9,99.6,98.9,98.9,98.9,99,100,99.3,99.2,99.4,99.8,100,99.8,99.5,99.9,100,100,100,100
172,Aruba,ABW,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100


In [37]:
df_electricity.info()

<class 'pandas.core.frame.DataFrame'>
Index: 266 entries, 1 to 5036
Data columns (total 27 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Country Name  266 non-null    object
 1   Country Code  266 non-null    object
 2   Series Name   266 non-null    object
 3   Series Code   266 non-null    object
 4   2000          266 non-null    object
 5   2001          266 non-null    object
 6   2002          266 non-null    object
 7   2003          266 non-null    object
 8   2004          266 non-null    object
 9   2005          266 non-null    object
 10  2006          266 non-null    object
 11  2007          266 non-null    object
 12  2008          266 non-null    object
 13  2009          266 non-null    object
 14  2010          266 non-null    object
 15  2011          266 non-null    object
 16  2012          266 non-null    object
 17  2013          266 non-null    object
 18  2014          266 non-null    object
 19  2015        

In [43]:
print(len(df_electricity['Country Name'].unique()))

266


In [47]:
sorted_electricity = df_electricity.sort_values(2000, ascending=False)

KeyError: 2000