In [2]:
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sn
import numpy as np
import glob

# Read in owid total
owid_total = pd.read_csv('/Users/dorotheakirves/neuefische/capstone_project/data/raw-owid-energy-data.csv')

# GDP

In [3]:
# Read in GDP Data
gdp_ppp = pd.read_csv("/Users/dorotheakirves/neuefische/capstone_project/data/raw-gdp-ppp-current-international-dollar.csv", skiprows=4)


In [4]:
# Keep only country code, country name and GDP
gdp_ppp = gdp_ppp.drop(['Indicator Code', 'Indicator Name'], axis=1)
gdp_ppp.head()

Unnamed: 0,Country Name,Country Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,Unnamed: 65
0,Aruba,ABW,,,,,,,,,...,3442856000.0,3799467000.0,3816822000.0,3893071000.0,3941354000.0,4098240000.0,,,,
1,Africa Eastern and Southern,AFE,,,,,,,,,...,1772679000000.0,1893539000000.0,2025402000000.0,2098286000000.0,2212573000000.0,2319151000000.0,2438518000000.0,2536280000000.0,2495345000000.0,
2,Afghanistan,AFG,,,,,,,,,...,59667000000.0,65039840000.0,69058340000.0,71831700000.0,70097960000.0,74711920000.0,77415570000.0,81879800000.0,80918340000.0,
3,Africa Western and Central,AFW,,,,,,,,,...,1396677000000.0,1526772000000.0,1645122000000.0,1662297000000.0,1678674000000.0,1744087000000.0,1841811000000.0,1937451000000.0,1946297000000.0,
4,Angola,AGO,,,,,,,,,...,186124200000.0,199865600000.0,220364800000.0,204603600000.0,204874700000.0,217987300000.0,218748600000.0,221262800000.0,211837300000.0,


In [5]:
# Years in rows
gdp_ppp = gdp_ppp.melt(id_vars=["Country Name", "Country Code"], 
        var_name="Year", 
        value_name="GDP_PPP")
gdp_ppp.head()

Unnamed: 0,Country Name,Country Code,Year,GDP_PPP
0,Aruba,ABW,1960,
1,Africa Eastern and Southern,AFE,1960,
2,Afghanistan,AFG,1960,
3,Africa Western and Central,AFW,1960,
4,Angola,AGO,1960,


In [6]:
# Convert year into numeric, drop rows before 1990, reset index
gdp_ppp["Year"] = pd.to_numeric(gdp_ppp["Year"], errors='coerce')
gdp_ppp = gdp_ppp.drop(gdp_ppp[gdp_ppp.Year < 1990].index)
gdp_ppp = gdp_ppp.reset_index()
gdp_ppp = gdp_ppp.drop(['index'], axis=1)

In [7]:
gdp_ppp.head()

Unnamed: 0,Country Name,Country Code,Year,GDP_PPP
0,Aruba,ABW,1990.0,1447709000.0
1,Africa Eastern and Southern,AFE,1990.0,565349500000.0
2,Afghanistan,AFG,1990.0,
3,Africa Western and Central,AFW,1990.0,354456400000.0
4,Angola,AGO,1990.0,38853490000.0


In [8]:
# Store in csv again for tableau
gdp_ppp.to_csv('/Users/dorotheakirves/neuefische/capstone_project/data/gdp-ppp-current-international-dollar-clean.csv')

# Country Codes

In [9]:
# Read in country codes in Stata
country_codes = pd.read_stata('/Users/dorotheakirves/neuefische/capstone_project/data/location.dta')
country_codes.head()

Unnamed: 0,location_id,location_code,location_name_short_en,level,parent_id
0,0,ABW,Aruba,country,356.0
1,1,AFG,Afghanistan,country,353.0
2,2,AGO,Angola,country,352.0
3,3,AIA,Anguilla,country,356.0
4,4,ALB,Albania,country,355.0


In [10]:
# Store country codes in csv for Tableau
country_codes.to_csv('/Users/dorotheakirves/neuefische/capstone_project/data/location.csv')

In [11]:
# Create List for only African Countries
# Read in List of African Countries
africa_total = pd.read_csv('/Users/dorotheakirves/neuefische/capstone_project/data/countries_in_africa.csv')

In [12]:
# Merge country code list with only Africa list
country_codes_africa = country_codes.merge(africa_total, left_on='location_name_short_en', right_on='country') 
country_codes_africa

Unnamed: 0.1,location_id,location_code,location_name_short_en,level,parent_id,Unnamed: 0,country
0,2,AGO,Angola,country,352.0,1,Angola
1,17,BDI,Burundi,country,352.0,5,Burundi
2,19,BEN,Benin,country,352.0,2,Benin
3,21,BFA,Burkina Faso,country,352.0,4,Burkina Faso
4,37,BWA,Botswana,country,352.0,3,Botswana
5,38,CAF,Central African Republic,country,352.0,8,Central African Republic
6,45,CMR,Cameroon,country,352.0,7,Cameroon
7,50,COM,Comoros,country,352.0,10,Comoros
8,51,CPV,Cabo Verde,country,352.0,6,Cabo Verde
9,62,DJI,Djibouti,country,352.0,13,Djibouti


In [13]:
# Identify missing countries
africa_total['has_code'] = africa_total['country'].isin(country_codes_africa['country'])
africa_total

Unnamed: 0.1,Unnamed: 0,country,has_code
0,0,Algeria,True
1,1,Angola,True
2,2,Benin,True
3,3,Botswana,True
4,4,Burkina Faso,True
5,5,Burundi,True
6,6,Cabo Verde,True
7,7,Cameroon,True
8,8,Central African Republic,True
9,9,Chad,True


In [14]:
# Google the country Codes of the five missing countries and check for spelling in county code list
missing_list_codes = ['CIV', 'GMB', 'COD', 'COG', 'STP' ]
# Create list of country names
missing_list_names = []
for i in missing_list_codes:
    missing_list_names.append(country_codes.iloc[country_codes.index[country_codes['location_code']==i]]['location_name_short_en'].tolist()[0])
missing_list_names

["Côte d'Ivoire",
 'Gambia',
 'Congo (Democratic Republic of the)',
 'Congo',
 'Sao Tome and Principe']

In [15]:
# Identify index of missing countries in total Africa list
index_list = []
index_list.append(africa_total.index[africa_total['country']=='Ivory Coast'])
index_list.append(africa_total.index[africa_total['country']=='The Gambia'])
index_list.append(africa_total.index[africa_total['country']=='Democratic Republic of the Congo'])
index_list.append(africa_total.index[africa_total['country']=='Republic of Congo'])
index_list.append(africa_total.index[africa_total['country']=='São Tomé and Príncipe'])	
index_list

[Int64Index([11], dtype='int64'),
 Int64Index([19], dtype='int64'),
 Int64Index([12], dtype='int64'),
 Int64Index([37], dtype='int64'),
 Int64Index([39], dtype='int64')]

In [16]:
# Replace name in Africa List with name in country code list
for i, j in zip(missing_list_names, index_list):
    africa_total.at[j,'country'] = i

africa_total

Unnamed: 0.1,Unnamed: 0,country,has_code
0,0,Algeria,True
1,1,Angola,True
2,2,Benin,True
3,3,Botswana,True
4,4,Burkina Faso,True
5,5,Burundi,True
6,6,Cabo Verde,True
7,7,Cameroon,True
8,8,Central African Republic,True
9,9,Chad,True


In [17]:
# Do the merging again
country_codes_africa = country_codes.merge(africa_total, left_on='location_name_short_en', right_on='country') 
country_codes_africa

Unnamed: 0.1,location_id,location_code,location_name_short_en,level,parent_id,Unnamed: 0,country,has_code
0,2,AGO,Angola,country,352.0,1,Angola,True
1,17,BDI,Burundi,country,352.0,5,Burundi,True
2,19,BEN,Benin,country,352.0,2,Benin,True
3,21,BFA,Burkina Faso,country,352.0,4,Burkina Faso,True
4,37,BWA,Botswana,country,352.0,3,Botswana,True
5,38,CAF,Central African Republic,country,352.0,8,Central African Republic,True
6,44,CIV,Côte d'Ivoire,country,352.0,11,Côte d'Ivoire,False
7,45,CMR,Cameroon,country,352.0,7,Cameroon,True
8,46,COD,Congo (Democratic Republic of the),country,352.0,12,Congo (Democratic Republic of the),False
9,47,COG,Congo,country,352.0,37,Congo,False


In [18]:
# Drop unnecessary columns
country_codes_africa = country_codes_africa.drop(['level','parent_id','Unnamed: 0','country', 'has_code'], axis = 1)
country_codes_africa

Unnamed: 0,location_id,location_code,location_name_short_en
0,2,AGO,Angola
1,17,BDI,Burundi
2,19,BEN,Benin
3,21,BFA,Burkina Faso
4,37,BWA,Botswana
5,38,CAF,Central African Republic
6,44,CIV,Côte d'Ivoire
7,45,CMR,Cameroon
8,46,COD,Congo (Democratic Republic of the)
9,47,COG,Congo


In [19]:
# Store in csv for tableau
country_codes_africa.to_csv('/Users/dorotheakirves/neuefische/capstone_project/data/country_codes_africa.csv')

# OWID Total - Population

In [20]:
# Read in owid total
owid_total = pd.read_csv('/Users/dorotheakirves/neuefische/capstone_project/data/raw-owid-energy-data.csv')

In [21]:
# Keep only Country code, country name, year and population
population = owid_total[["iso_code", "country", "year","population"]]

In [22]:
# Drop rows before 1990
population = population.drop(population[population.year < 1990].index)
population = population.reset_index()
population = population.drop(['index'], axis=1)

In [23]:
# Store population in csv for Tableau
population.to_csv('/Users/dorotheakirves/neuefische/capstone_project/data/population.csv')

In [24]:
country_codes = pd.read_csv('/Users/dorotheakirves/neuefische/capstone_project/data/location.csv')

# OWID Total - Total Primary Energy Consumption

In [25]:
# Read in owid total
owid_total = pd.read_csv('/Users/dorotheakirves/neuefische/capstone_project/data/raw-owid-energy-data.csv')

In [26]:
# Keep only Country code, country name, year and population
total_prim_en_con = owid_total[["iso_code", "country", "year","primary_energy_consumption"]]

In [27]:
# Drop rows before 1990
total_prim_en_con = total_prim_en_con.drop(total_prim_en_con[total_prim_en_con.year < 1990].index)
total_prim_en_con = total_prim_en_con.reset_index()
total_prim_en_con = total_prim_en_con.drop(['index'], axis=1)

In [28]:
# Store population in csv for Tableau
total_prim_en_con.to_csv('/Users/dorotheakirves/neuefische/capstone_project/data/total_primary_energy_consumption.csv')

# OWID Total - Energy consumption

In [29]:
# Keep only Country code, country name, year and and consumption by source from owid_total
prim_en_cons = owid_total[['iso_code', 'country', 'year', 'nuclear_consumption', 'coal_consumption', 'hydro_consumption', 'oil_consumption', 'gas_consumption', 'wind_consumption', 'solar_consumption', 'other_renewable_consumption', 'biofuel_consumption', 'low_carbon_consumption', 'renewables_consumption']]


In [30]:
# Drop rows before 1990
prim_en_cons = prim_en_cons.drop(prim_en_cons[prim_en_cons.year < 1990].index)
prim_en_cons = prim_en_cons.reset_index()
prim_en_cons = prim_en_cons.drop(['index'], axis=1)

In [31]:
# Store energy consumption in csv for Tableau
prim_en_cons.to_csv('/Users/dorotheakirves/neuefische/capstone_project/data/energy-consumption-by-source-and-region.csv')

# Add Data for Africa

In [32]:
# read in only country codes only Africa
country_codes_africa = pd.read_csv('/Users/dorotheakirves/neuefische/capstone_project/data/country_codes_africa.csv')

# Extract country names as list
missing_africa = country_codes_africa['location_name_short_en'].tolist()

In [33]:
# Remove countries with existing data, create list with only countries with missing data
existing_africa = ['Algeria','Egypt','South Africa', 'Morocco']
for i in existing_africa:
    missing_africa.remove(i)
#sort alphabetically
missing_africa.sort()
missing_africa

['Angola',
 'Benin',
 'Botswana',
 'Burkina Faso',
 'Burundi',
 'Cabo Verde',
 'Cameroon',
 'Central African Republic',
 'Chad',
 'Comoros',
 'Congo',
 'Congo (Democratic Republic of the)',
 "Côte d'Ivoire",
 'Djibouti',
 'Eritrea',
 'Eswatini',
 'Ethiopia',
 'Gabon',
 'Gambia',
 'Ghana',
 'Guinea',
 'Guinea-Bissau',
 'Kenya',
 'Lesotho',
 'Liberia',
 'Libya',
 'Madagascar',
 'Malawi',
 'Mali',
 'Mauritania',
 'Mauritius',
 'Mozambique',
 'Namibia',
 'Niger',
 'Nigeria',
 'Rwanda',
 'Sao Tome and Principe',
 'Senegal',
 'Seychelles',
 'Sierra Leone',
 'Somalia',
 'South Sudan',
 'Sudan',
 'Tanzania',
 'Togo',
 'Tunisia',
 'Uganda',
 'Zambia',
 'Zimbabwe']

In [34]:
'South Africa' in missing_africa

False

In [35]:
len(missing_africa)

49

In [36]:
# read in data for every missing country, adding contry name column
path = r'/Users/dorotheakirves/neuefische/Capstone/africa' # use your path
all_files = glob.glob(path + "/*.csv")
all_files.sort()

li = []

for filename, i in zip(all_files, missing_africa):
    df = pd.read_csv(filename, index_col=False, header=0, sep=';')
    df['country'] = i
    li.append(df)

africa_frame = pd.concat(li, axis=0, ignore_index=True)

In [37]:
# Check if it worked until the end
africa_frame.loc[(africa_frame["country"] == "Zimbabwe")]
#africa_frame.tail()

Unnamed: 0.1,Unnamed: 0,Oil,Coal,Gas,Hydroelectricity,Nuclear,Biomass and Waste,Wind,Fuel Ethanol,"Solar, Tide, Wave, Fuel Cell",Geothermal,Biodiesel,country
1720,1980-01-01 00:00:00,76266051,19449868951,0,11331428825,0,0,0,20447988115,0,0,,Zimbabwe
1721,1981-01-01 00:00:00,87999292836,17921975375,0,11634285655,0,0,0,20447988115,0,0,,Zimbabwe
1722,1982-01-01 00:00:00,70399434734,18573684522,0,101942856877,0,0,0,20447988115,0,0,,Zimbabwe
1723,1983-01-01 00:00:00,90932604458,18211624014,0,10557142967,0,0,0,20447988115,0,0,,Zimbabwe
1724,1984-01-01 00:00:00,87999292836,18682301744,0,97771430574,0,0,0,20447988115,0,0,,Zimbabwe
1725,1985-01-01 00:00:00,93865910265,21911878312,0,87571427462,0,0,0,20447988115,0,0,,Zimbabwe
1726,1986-01-01 00:00:00,97155583695,25880056409,0,89200001948,0,0,0,20447988115,0,0,,Zimbabwe
1727,1987-01-01 00:00:00,105850326374,35032938515,0,70857141525,0,0,0,20447988115,0,0,,Zimbabwe
1728,1988-01-01 00:00:00,105687666868,33700556869,0,75971427047,0,0,0,20447988115,0,0,,Zimbabwe
1729,1989-01-01 00:00:00,116063514417,34482605752,0,96257145261,0,0,0,20447988115,0,0,,Zimbabwe


In [38]:
# Deal with year
africa_frame['year']  = africa_frame["Unnamed: 0"].str.slice(stop=4)

In [39]:
africa_frame.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1757 entries, 0 to 1756
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   Unnamed: 0                    1757 non-null   object
 1   Oil                           1757 non-null   object
 2   Coal                          1757 non-null   object
 3   Gas                           1757 non-null   object
 4   Hydroelectricity              1757 non-null   object
 5   Nuclear                       1757 non-null   int64 
 6   Biomass and Waste             1757 non-null   object
 7   Wind                          1757 non-null   object
 8   Fuel Ethanol                  1741 non-null   object
 9   Solar, Tide, Wave, Fuel Cell  1757 non-null   object
 10  Geothermal                    1757 non-null   object
 11  Biodiesel                     816 non-null    object
 12  country                       1757 non-null   object
 13  year              

In [40]:
# Convert datatypes
columns = africa_frame.columns.tolist()
columns.remove('country')
columns.remove('Unnamed: 0')
columns.remove('Nuclear')

for i in columns:
    africa_frame[i] = africa_frame[i].str.replace(',','.')

for i in columns:
    africa_frame[i] = pd.to_numeric(africa_frame[i], errors='coerce')

africa_frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1757 entries, 0 to 1756
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Unnamed: 0                    1757 non-null   object 
 1   Oil                           1757 non-null   float64
 2   Coal                          767 non-null    float64
 3   Gas                           444 non-null    float64
 4   Hydroelectricity              1396 non-null   float64
 5   Nuclear                       1757 non-null   int64  
 6   Biomass and Waste             629 non-null    float64
 7   Wind                          494 non-null    float64
 8   Fuel Ethanol                  148 non-null    float64
 9   Solar, Tide, Wave, Fuel Cell  1397 non-null   float64
 10  Geothermal                    74 non-null     float64
 11  Biodiesel                     34 non-null     float64
 12  country                       1757 non-null   object 
 13  yea

In [41]:
africa_frame.describe()

Unnamed: 0,Oil,Coal,Gas,Hydroelectricity,Nuclear,Biomass and Waste,Wind,Fuel Ethanol,"Solar, Tide, Wave, Fuel Cell",Geothermal,Biodiesel,year
count,1757.0,767.0,444.0,1396.0,1757.0,629.0,494.0,148.0,1397.0,74.0,34.0,1757.0
mean,14.841287,2.039173,15.906669,4.561372,0.0,0.159294,0.037006,0.035738,0.003974,1.299946,0.007035,1998.214001
std,29.875289,5.860572,30.140261,7.721843,0.0,0.376574,0.20539,0.067838,0.013935,2.594127,0.01299,10.663884
min,0.100933,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1980.0
25%,2.212212,0.0,0.0,0.194286,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1989.0
50%,4.923534,0.267207,1.51842,0.947143,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,1998.0
75%,13.3054,1.040849,16.590947,4.945714,0.0,0.058333,0.0,0.060938,0.0,1.111429,0.009966,2007.0
max,289.234448,36.024982,204.997521,48.671428,0.0,1.786111,2.342857,0.279107,0.18,12.8,0.044849,2016.0


In [42]:
africa_frame = africa_frame.rename(columns={'Nuclear': 'nuclear_consumption','Coal': 'coal_consumption', 'Hydroelectricity': 'hydro_consumption','Oil': 'oil_consumption', 'Gas': 'gas_consumption', 'Wind': 'wind_consumption', 'location_code':'iso_code'})

In [44]:
africa_frame = africa_frame.fillna(0)

In [45]:
africa_frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1757 entries, 0 to 1756
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Unnamed: 0                    1757 non-null   object 
 1   oil_consumption               1757 non-null   float64
 2   coal_consumption              1757 non-null   float64
 3   gas_consumption               1757 non-null   float64
 4   hydro_consumption             1757 non-null   float64
 5   nuclear_consumption           1757 non-null   int64  
 6   Biomass and Waste             1757 non-null   float64
 7   wind_consumption              1757 non-null   float64
 8   Fuel Ethanol                  1757 non-null   float64
 9   Solar, Tide, Wave, Fuel Cell  1757 non-null   float64
 10  Geothermal                    1757 non-null   float64
 11  Biodiesel                     1757 non-null   float64
 12  country                       1757 non-null   object 
 13  yea

In [47]:
# Create/calculate missing columns for concating later
africa_frame['biofuel_consumption'] = africa_frame['Fuel Ethanol'] + africa_frame['Biodiesel'] + africa_frame['Biomass and Waste']
africa_frame['solar_consumption'] = np.nan
africa_frame['other_renewable_consumption'] = africa_frame['Solar, Tide, Wave, Fuel Cell']+ africa_frame['Geothermal']
africa_frame['renewables_consumption'] = africa_frame['other_renewable_consumption'] + africa_frame['hydro_consumption'] + africa_frame['wind_consumption']+ africa_frame['biofuel_consumption']
africa_frame['low_carbon_consumption'] = africa_frame['renewables_consumption'] + africa_frame['nuclear_consumption']

In [50]:
africa_frame.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1757 entries, 0 to 1756
Data columns (total 23 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Unnamed: 0_x                  1757 non-null   object 
 1   oil_consumption               1757 non-null   float64
 2   coal_consumption              1757 non-null   float64
 3   gas_consumption               1757 non-null   float64
 4   hydro_consumption             1757 non-null   float64
 5   nuclear_consumption           1757 non-null   int64  
 6   Biomass and Waste             1757 non-null   float64
 7   wind_consumption              1757 non-null   float64
 8   Fuel Ethanol                  1757 non-null   float64
 9   Solar, Tide, Wave, Fuel Cell  1757 non-null   float64
 10  Geothermal                    1757 non-null   float64
 11  Biodiesel                     1757 non-null   float64
 12  country                       1757 non-null   object 
 13  yea

In [49]:
# Add ISO codes
africa_frame = africa_frame.merge(country_codes_africa, left_on='country', right_on='location_name_short_en')
africa_frame = africa_frame.rename(columns={'location_code':'iso_code'})

In [52]:
# Take only columns which match with energy df
africa_frame = africa_frame[prim_en_cons.columns]

In [53]:
africa_frame.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1757 entries, 0 to 1756
Data columns (total 14 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   iso_code                     1757 non-null   object 
 1   country                      1757 non-null   object 
 2   year                         1757 non-null   int64  
 3   nuclear_consumption          1757 non-null   int64  
 4   coal_consumption             1757 non-null   float64
 5   hydro_consumption            1757 non-null   float64
 6   oil_consumption              1757 non-null   float64
 7   gas_consumption              1757 non-null   float64
 8   wind_consumption             1757 non-null   float64
 9   solar_consumption            0 non-null      float64
 10  other_renewable_consumption  1757 non-null   float64
 11  biofuel_consumption          1757 non-null   float64
 12  low_carbon_consumption       1757 non-null   float64
 13  renewables_consump

In [54]:
# Drop rows before 1990
africa_frame = africa_frame.drop(africa_frame[africa_frame.year < 1990].index)
africa_frame = africa_frame.reset_index()
africa_frame = africa_frame.drop(['index'], axis=1)

In [55]:
africa_frame.head()

Unnamed: 0,iso_code,country,year,nuclear_consumption,coal_consumption,hydro_consumption,oil_consumption,gas_consumption,wind_consumption,solar_consumption,other_renewable_consumption,biofuel_consumption,low_carbon_consumption,renewables_consumption
0,AGO,Angola,1990,0,0.0,2.051429,15.325936,5.834961,0.0,,0.0,0.0,2.051429,2.051429
1,AGO,Angola,1991,0,0.0,2.042857,16.144753,6.142351,0.0,,0.0,0.0,2.042857,2.042857
2,AGO,Angola,1992,0,0.0,2.377143,16.129702,6.182138,0.0,,0.0,0.0,2.377143,2.377143
3,AGO,Angola,1993,0,0.0,2.517143,15.644819,6.073679,0.0,,0.0,0.0,2.517143,2.517143
4,AGO,Angola,1994,0,0.0,2.531429,15.46997,5.638678,0.0,,0.0,0.0,2.531429,2.531429


In [56]:
missing_africa

['Angola',
 'Benin',
 'Botswana',
 'Burkina Faso',
 'Burundi',
 'Cabo Verde',
 'Cameroon',
 'Central African Republic',
 'Chad',
 'Comoros',
 'Congo',
 'Congo (Democratic Republic of the)',
 "Côte d'Ivoire",
 'Djibouti',
 'Eritrea',
 'Eswatini',
 'Ethiopia',
 'Gabon',
 'Gambia',
 'Ghana',
 'Guinea',
 'Guinea-Bissau',
 'Kenya',
 'Lesotho',
 'Liberia',
 'Libya',
 'Madagascar',
 'Malawi',
 'Mali',
 'Mauritania',
 'Mauritius',
 'Mozambique',
 'Namibia',
 'Niger',
 'Nigeria',
 'Rwanda',
 'Sao Tome and Principe',
 'Senegal',
 'Seychelles',
 'Sierra Leone',
 'Somalia',
 'South Sudan',
 'Sudan',
 'Tanzania',
 'Togo',
 'Tunisia',
 'Uganda',
 'Zambia',
 'Zimbabwe']

In [57]:
#Create Dataframe without Africa
frame_without_africa = prim_en_cons
for i in missing_africa:
    frame_without_africa.drop(frame_without_africa.loc[frame_without_africa['country']==i].index, inplace=True)
frame_without_africa.head()

Unnamed: 0,iso_code,country,year,nuclear_consumption,coal_consumption,hydro_consumption,oil_consumption,gas_consumption,wind_consumption,solar_consumption,other_renewable_consumption,biofuel_consumption,low_carbon_consumption,renewables_consumption
0,AFG,Afghanistan,1990,,,,,,,,,,,
1,AFG,Afghanistan,1991,,,,,,,,,,,
2,AFG,Afghanistan,1992,,,,,,,,,,,
3,AFG,Afghanistan,1993,,,,,,,,,,,
4,AFG,Afghanistan,1994,,,,,,,,,,,


In [58]:
# Add africa frame
prim_en_cons_with_africa = africa_frame.append(frame_without_africa, ignore_index=True) 

In [59]:
prim_en_cons_with_africa.to_csv('/Users/dorotheakirves/neuefische/capstone_project/data/energy-consumption-by-source-and-region.csv')

In [60]:
prim_en_cons_with_africa.loc[(prim_en_cons_with_africa["country"] == "Angola")]

Unnamed: 0,iso_code,country,year,nuclear_consumption,coal_consumption,hydro_consumption,oil_consumption,gas_consumption,wind_consumption,solar_consumption,other_renewable_consumption,biofuel_consumption,low_carbon_consumption,renewables_consumption
0,AGO,Angola,1990,0.0,0.0,2.051429,15.325936,5.834961,0.0,,0.0,0.0,2.051429,2.051429
1,AGO,Angola,1991,0.0,0.0,2.042857,16.144753,6.142351,0.0,,0.0,0.0,2.042857,2.042857
2,AGO,Angola,1992,0.0,0.0,2.377143,16.129702,6.182138,0.0,,0.0,0.0,2.377143,2.377143
3,AGO,Angola,1993,0.0,0.0,2.517143,15.644819,6.073679,0.0,,0.0,0.0,2.517143,2.517143
4,AGO,Angola,1994,0.0,0.0,2.531429,15.46997,5.638678,0.0,,0.0,0.0,2.531429,2.531429
5,AGO,Angola,1995,0.0,0.0,2.545714,16.749385,6.073679,0.0,,0.0,0.0,2.545714,2.545714
6,AGO,Angola,1996,0.0,0.0,2.617143,15.673962,6.073679,0.0,,0.0,0.0,2.617143,2.617143
7,AGO,Angola,1997,0.0,0.0,2.474286,18.772116,6.182138,0.0,,0.0,0.0,2.474286,2.474286
8,AGO,Angola,1998,0.0,0.0,2.997143,15.366332,6.290596,0.0,,0.0,0.0,2.997143,2.997143
9,AGO,Angola,1999,0.0,0.0,2.528572,18.947092,6.073679,0.0,,0.0,0.0,2.528572,2.528572
