# Analysis of Kenya Energy Statistics

## Importing Libraries to be used 

In [10]:
import pandas as pd
import numpy as np

## Loading and reading the dataset

In [11]:
data = '/home/kuria/Documents/african-regional-energy-statistics-2014-xlsx-1.xls'
df1 = pd.read_excel(data)
df1.head()

Unnamed: 0,Indicator,IndicatorName,Region,RegionName,Unit,Date,Value
0,102,"Final Consumption of coking coal, 1000 tonnes",10203,Ethiopia,thousand tonnes,2006,11.0
1,102,"Final Consumption of coking coal, 1000 tonnes",10203,Ethiopia,thousand tonnes,2007,6.0
2,102,"Final Consumption of coking coal, 1000 tonnes",10203,Ethiopia,thousand tonnes,2008,15.0
3,102,"Final Consumption of coking coal, 1000 tonnes",10203,Ethiopia,thousand tonnes,2009,25.0
4,102,"Final Consumption of coking coal, 1000 tonnes",10203,Ethiopia,thousand tonnes,2010,50.0


## Previewing and Inspecting the dataset

In [12]:
df1['IndicatorName'].unique()

array(['Final Consumption of coking coal, 1000 tonnes',
       'Consumption of coking coal in industry , 1000 tonnes',
       'Net imports of coking coal, 1000 tonnes',
       'Production of charcoal, 1000 tonnes',
       'Final consumption of Charcoal ( 1000 tonnes )',
       'Production of Coal (1000 tonnes)',
       'Production of crude oil, NLG and additives, 1000 tonnes',
       'Refinery output of oil products, 1000 tonnes',
       'Final consumption of oil, 1000 tonnes',
       'Consumption of oil in industry, 1000 tonnes',
       'Consommation de pétrole dans les transports, 1000 tonnes',
       'Net imports of crude oil, NGL, refinery feedstocks and additives, 1000 tonnes',
       'Net imports of oil products, 1000 tonnes',
       'Production of natural gas, Tj',
       'Final consumption of natural gas, Tj',
       'Consumption of natural gas in industry, Tj',
       'Net imports of natural gas, Tj',
       'Production of electricity from biofuels and waste, GWh',
       'Pro

In [13]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17018 entries, 0 to 17017
Data columns (total 7 columns):
Indicator        17018 non-null int64
IndicatorName    17018 non-null object
Region           17018 non-null int64
RegionName       17018 non-null object
Unit             17018 non-null object
Date             17018 non-null int64
Value            17018 non-null float64
dtypes: float64(1), int64(3), object(3)
memory usage: 930.8+ KB


In [17]:
df1.shape

(17018, 7)

In [23]:
df['RegionName'].unique()

array(['Ethiopia', 'Kenya', 'Tanzania', 'Algeria', 'Egypt', 'Morocco',
       'Tunisia', 'Botswana', 'Madagascar', 'Malawi', 'Mauritius',
       'Mozambique', 'Namibia', 'South Africa', 'Swaziland', 'Zambia',
       'Zimbabwe', 'Niger', 'Nigeria', 'Senegal', 'Africa', 'Togo',
       'Angola', 'Cameroon', 'Central African Republic', 'Chad',
       'Congo, Rep.', 'Congo, Dem. Rep.', 'Equatorial Guinea', 'Rwanda',
       'Djibouti', 'Eritrea', 'Somalia', 'Uganda', 'Libya', 'Lesotho',
       'Benin', 'Burkina Faso', 'Cape Verde', "Cote d'Ivoire", 'Ghana',
       'Guinea', 'Guinea-Bissau', 'Liberia', 'Mali', 'Sudan', 'Gabon',
       'South Sudan', 'Mauritania', 'Burundi', 'Sao Tome and Principe',
       'Comoros', 'Seychelles', 'Gambia', 'Sierra Leone', 'East Africa',
       'West Africa', 'North Africa', 'Southern Africa', 'Central Africa',
       'Saharawi Republic'], dtype=object)

In [20]:
df1.tail()

Unnamed: 0,Indicator,IndicatorName,Region,RegionName,Unit,Date,Value
17013,21773673,Production of Firewood (1000 tonnes),1,Africa,thousand tonnes,2010,666808.970223
17014,21773673,Production of Firewood (1000 tonnes),1,Africa,thousand tonnes,2011,742603.228616
17015,21773673,Production of Firewood (1000 tonnes),1,Africa,thousand tonnes,2012,803040.046773
17016,21773673,Production of Firewood (1000 tonnes),1,Africa,thousand tonnes,2013,953799.009468
17017,21773673,Production of Firewood (1000 tonnes),1,Africa,thousand tonnes,2014,975947.401262


In [22]:
#making a copy of the original dataset that we will use in the analysis.
df = df1.copy()
df.head()

Unnamed: 0,Indicator,IndicatorName,Region,RegionName,Unit,Date,Value
0,102,"Final Consumption of coking coal, 1000 tonnes",10203,Ethiopia,thousand tonnes,2006,11.0
1,102,"Final Consumption of coking coal, 1000 tonnes",10203,Ethiopia,thousand tonnes,2007,6.0
2,102,"Final Consumption of coking coal, 1000 tonnes",10203,Ethiopia,thousand tonnes,2008,15.0
3,102,"Final Consumption of coking coal, 1000 tonnes",10203,Ethiopia,thousand tonnes,2009,25.0
4,102,"Final Consumption of coking coal, 1000 tonnes",10203,Ethiopia,thousand tonnes,2010,50.0


## Data cleaning

### Completeness

In [24]:
#Counting missing values using pandas

df.isnull().sum()

Indicator        0
IndicatorName    0
Region           0
RegionName       0
Unit             0
Date             0
Value            0
dtype: int64

In [25]:
#Counting missing values using numpy
np.count_nonzero(df.isnull())

0

### Validity

In [26]:
#dropping the Region column as we will not be using it.
df.drop(columns = ['Region'], inplace=True)
df.head()

Unnamed: 0,Indicator,IndicatorName,RegionName,Unit,Date,Value
0,102,"Final Consumption of coking coal, 1000 tonnes",Ethiopia,thousand tonnes,2006,11.0
1,102,"Final Consumption of coking coal, 1000 tonnes",Ethiopia,thousand tonnes,2007,6.0
2,102,"Final Consumption of coking coal, 1000 tonnes",Ethiopia,thousand tonnes,2008,15.0
3,102,"Final Consumption of coking coal, 1000 tonnes",Ethiopia,thousand tonnes,2009,25.0
4,102,"Final Consumption of coking coal, 1000 tonnes",Ethiopia,thousand tonnes,2010,50.0


### Accuracy

### Consistency

In [27]:
# Checking for duplicates
df.duplicated().sum()

0

In [None]:
## No duplicates in the dataset

### Uniformity


In [28]:
# converting column names to lowercase
df.columns = df.columns.str.lower()
df.head()

Unnamed: 0,indicator,indicatorname,regionname,unit,date,value
0,102,"Final Consumption of coking coal, 1000 tonnes",Ethiopia,thousand tonnes,2006,11.0
1,102,"Final Consumption of coking coal, 1000 tonnes",Ethiopia,thousand tonnes,2007,6.0
2,102,"Final Consumption of coking coal, 1000 tonnes",Ethiopia,thousand tonnes,2008,15.0
3,102,"Final Consumption of coking coal, 1000 tonnes",Ethiopia,thousand tonnes,2009,25.0
4,102,"Final Consumption of coking coal, 1000 tonnes",Ethiopia,thousand tonnes,2010,50.0


## Exporting the cleaned dataset to csv

## Data Analysis 

In [29]:
#Selecting Kenya and East Africa rows only
df2 = df[(df['regionname'] == 'Kenya') | (df['regionname'] == 'East Africa')]
df2.head()

Unnamed: 0,indicator,indicatorname,regionname,unit,date,value
6,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2000,66.0
7,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2001,66.0
8,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2002,99.0
9,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2003,92.0
10,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2004,108.0


In [52]:
df2.shape

(776, 6)

In [53]:
df2

Unnamed: 0,indicator,indicatorname,regionname,unit,date,value
6,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2000,66.000000
7,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2001,66.000000
8,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2002,99.000000
9,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2003,92.000000
10,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2004,108.000000
11,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2005,108.000000
12,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2007,110.000000
13,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2008,109.000000
14,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2009,154.000000
15,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2010,268.000000


In [60]:
#resetting the index column
df2.reset_index(drop=True, inplace=True)
df2.head()

Unnamed: 0,indicator,indicatorname,regionname,unit,date,value
0,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2000,66.0
1,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2001,66.0
2,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2002,99.0
3,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2003,92.0
4,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2004,108.0


## df2

In [63]:
df2['date'].unique()

array([2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011,
       2006, 2012, 2013, 2014])

In [62]:
#
import pandas_profiling
pandas_profiling.ProfileReport(df2)

0,1
Number of variables,6
Number of observations,776
Total Missing (%),0.0%
Total size in memory,36.5 KiB
Average record size in memory,48.1 B

0,1
Numeric,3
Categorical,3
Boolean,0
Date,0
Text (Unique),0
Rejected,0
Unsupported,0

0,1
Distinct count,15
Unique (%),1.9%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,2007.1
Minimum,2000
Maximum,2014
Zeros (%),0.0%

0,1
Minimum,2000
5-th percentile,2000
Q1,2003
Median,2007
Q3,2011
95-th percentile,2014
Maximum,2014
Range,14
Interquartile range,8

0,1
Standard deviation,4.2908
Coef of variation,0.0021378
Kurtosis,-1.1882
Mean,2007.1
MAD,3.7028
Skewness,-0.035464
Sum,1557531
Variance,18.411
Memory size,6.1 KiB

Value,Count,Frequency (%),Unnamed: 3
2014,53,6.8%,
2013,53,6.8%,
2012,53,6.8%,
2011,53,6.8%,
2010,53,6.8%,
2009,53,6.8%,
2008,53,6.8%,
2007,53,6.8%,
2005,53,6.8%,
2006,52,6.7%,

Value,Count,Frequency (%),Unnamed: 3
2000,49,6.3%,
2001,48,6.2%,
2002,48,6.2%,
2003,50,6.4%,
2004,52,6.7%,

Value,Count,Frequency (%),Unnamed: 3
2010,53,6.8%,
2011,53,6.8%,
2012,53,6.8%,
2013,53,6.8%,
2014,53,6.8%,

0,1
Distinct count,32
Unique (%),4.1%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,4629300
Minimum,102
Maximum,21773673
Zeros (%),0.0%

0,1
Minimum,102.0
5-th percentile,103.0
Q1,303.75
Median,502.0
Q3,511.0
95-th percentile,21773000.0
Maximum,21773673.0
Range,21773571.0
Interquartile range,207.25

0,1
Standard deviation,8913300
Coef of variation,1.9254
Kurtosis,-0.019321
Mean,4629300
MAD,7289400
Skewness,1.4074
Sum,3592320783
Variance,79447000000000
Memory size,6.1 KiB

Value,Count,Frequency (%),Unnamed: 3
21772773,30,3.9%,
501,30,3.9%,
302,30,3.9%,
303,30,3.9%,
304,30,3.9%,
305,30,3.9%,
306,30,3.9%,
307,30,3.9%,
201,30,3.9%,
104,30,3.9%,

Value,Count,Frequency (%),Unnamed: 3
102,29,3.7%,
103,30,3.9%,
104,30,3.9%,
201,30,3.9%,
301,15,1.9%,

Value,Count,Frequency (%),Unnamed: 3
21773273,15,1.9%,
21773373,15,1.9%,
21773473,15,1.9%,
21773573,15,1.9%,
21773673,15,1.9%,

0,1
Distinct count,32
Unique (%),4.1%
Missing (%),0.0%
Missing (n),0

0,1
"Consumption of electricity in industry, GWh",30
"Consommation de pétrole dans les transports, 1000 tonnes",30
Production of electricity from renewable sources (GWh),30
Other values (29),686

Value,Count,Frequency (%),Unnamed: 3
"Consumption of electricity in industry, GWh",30,3.9%,
"Consommation de pétrole dans les transports, 1000 tonnes",30,3.9%,
Production of electricity from renewable sources (GWh),30,3.9%,
"Production of geothermal electricity, GWh",30,3.9%,
"Refinery output of oil products, 1000 tonnes",30,3.9%,
"Production of hydro electricity, GWh",30,3.9%,
"Consumption of oil in industry, 1000 tonnes",30,3.9%,
"Production of electricity from biofuels and waste, GWh",30,3.9%,
"Production of electricity from fossil fuels, GWh",30,3.9%,
"Final consumption of oil, 1000 tonnes",30,3.9%,

0,1
Distinct count,2
Unique (%),0.3%
Missing (%),0.0%
Missing (n),0

0,1
East Africa,467
Kenya,309

Value,Count,Frequency (%),Unnamed: 3
East Africa,467,60.2%,
Kenya,309,39.8%,

0,1
Distinct count,3
Unique (%),0.4%
Missing (%),0.0%
Missing (n),0

0,1
thousand tonnes,404
GWh,338
terajoules (TJ),34

Value,Count,Frequency (%),Unnamed: 3
thousand tonnes,404,52.1%,
GWh,338,43.6%,
terajoules (TJ),34,4.4%,

0,1
Distinct count,643
Unique (%),82.9%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,6787.2
Minimum,-25680
Maximum,139290
Zeros (%),0.0%

0,1
Minimum,-25680.0
5-th percentile,11.0
Q1,349.27
Median,2373.7
Q3,6175.7
95-th percentile,23228.0
Maximum,139290.0
Range,164970.0
Interquartile range,5826.4

0,1
Standard deviation,17320
Coef of variation,2.5518
Kurtosis,34.735
Mean,6787.2
MAD,7774.4
Skewness,5.609
Sum,5266800
Variance,299970000
Memory size,6.1 KiB

Value,Count,Frequency (%),Unnamed: 3
108.0,9,1.2%,
5323.0,7,0.9%,
1.0,7,0.9%,
321.0,7,0.9%,
66.0,6,0.8%,
92.0,4,0.5%,
11.0,4,0.5%,
3.0,3,0.4%,
730.0,3,0.4%,
4975.0,3,0.4%,

Value,Count,Frequency (%),Unnamed: 3
-25680.0,1,0.1%,
-7453.0,1,0.1%,
-6151.7,1,0.1%,
-5763.8,1,0.1%,
-5752.8,1,0.1%,

Value,Count,Frequency (%),Unnamed: 3
127928.064285714,1,0.1%,
133583.548714286,1,0.1%,
135785.551,1,0.1%,
137608.768016033,1,0.1%,
139288.348282333,1,0.1%,

Unnamed: 0,indicator,indicatorname,regionname,unit,date,value
0,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2000,66.0
1,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2001,66.0
2,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2002,99.0
3,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2003,92.0
4,102,"Final Consumption of coking coal, 1000 tonnes",Kenya,thousand tonnes,2004,108.0


## Comparative Analysis Electricity between Kenya and E.A between 2000 and 2014

In [64]:
df2['indicatorname'].unique()

array(['Final Consumption of coking coal, 1000 tonnes',
       'Consumption of coking coal in industry , 1000 tonnes',
       'Net imports of coking coal, 1000 tonnes',
       'Production of charcoal, 1000 tonnes',
       'Final consumption of Charcoal ( 1000 tonnes )',
       'Refinery output of oil products, 1000 tonnes',
       'Final consumption of oil, 1000 tonnes',
       'Consumption of oil in industry, 1000 tonnes',
       'Consommation de pétrole dans les transports, 1000 tonnes',
       'Net imports of crude oil, NGL, refinery feedstocks and additives, 1000 tonnes',
       'Net imports of oil products, 1000 tonnes',
       'Production of electricity from biofuels and waste, GWh',
       'Production of electricity from fossil fuels, GWh',
       'Production of hydro electricity, GWh',
       'Production of geothermal electricity, GWh',
       'Production of electricity from solar, wind, tide, wave and other sources, GWh',
       'Total production of electricity, GWh',
       '

## Analysis

### Year of highest production of electricity from renewable sources

In [77]:
#Determine the year in which the production of electricity from renewable sources was the highest in Kenya between 2000 and 2014?

kenre = df2[(df2['regionname'] == 'Kenya') & (df2['indicatorname'] == 'Production of electricity from renewable sources (GWh)')]

kenre.groupby('date')[['value']].max().sort_values('value', ascending=False).head()

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2014,5941.88003
2013,5583.700028
2011,5306.0
2012,5265.36882
2010,5180.0


In [None]:
## 2014 had the highest production capoacity of renewable sources with 5941.880030 GWh

### Most used source of electricity in 2014

In [None]:
#Which source of electricity was used the most in 2014?

In [79]:
# Geothermal production in 2014
kenge = df2[(df2['regionname'] == 'Kenya') & (df2['indicatorname'] == 'Production of geothermal electricity, GWh')]

kenge.groupby('date')[['value']].max().sort_values('value', ascending=False).head()

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2014,3046.0
2013,2820.075264
2012,2611.1808
2011,1518.0
2010,1453.0


In [None]:
## 

In [83]:
# production from fossil fuek
kenfos = df2[(df2['regionname'] == 'Kenya') & (df2['indicatorname'] == 'Production of electricity from fossil fuels, GWh')]

kenfos.groupby('date')[['value']].max().sort_values('value', ascending=False).head()

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2014,3541.0
2013,3405.24887
2012,3274.27776
2009,3029.0
2011,2569.0


In [88]:
kendro = df2[(df2['regionname'] == 'Kenya') & (df2['indicatorname'] == 'Production of hydro electricity, GWh')]

kendro.groupby('date')[['value']].max().sort_values('date', ascending=False).head()

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2014,2598.0
2013,2498.224735
2012,2402.139168
2011,3451.0
2010,3393.0


In [90]:
kenbio = df2[(df2['regionname'] == 'Kenya') & (df2['indicatorname'] == 'Production of electricity from biofuels and waste, GWh')]

kenbio.groupby('date')[['value']].max().sort_values('date', ascending=False).head()

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2014,231.0
2013,223.88003
2012,217.359252
2011,316.0
2010,316.0


In [91]:
kensol = df2[(df2['regionname'] == 'Kenya') & (df2['indicatorname'] == 'Production of electricity from solar, wind, tide, wave and other sources, GWh')]

kensol.groupby('date')[['value']].max().sort_values('value', ascending=False).head()

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2014,74.0
2013,41.52
2012,34.6896
2011,21.0
2010,18.0


In [None]:
#Which source of electricity was used the most in 2014

## Fossil fuels was the most used source of electricity in 2014
 

### Percantage between the production of electricty from fossil fuels and renewable sources 

In [125]:
# Find the percantage between production of electricity from fossil fuels and production of electricity from renewable sources in year 2013?
fos10 = df2[(df2['regionname'] == 'Kenya') & (df2['date'] == 2010) & (df2['indicatorname'] == 'Production of electricity from fossil fuels, GWh')]
fos10.value.values


ren10 = df2[(df2['regionname'] == 'Kenya') & (df2['date'] == 2010) & (df2['indicatorname'] == 'Production of electricity from renewable sources (GWh)')]
ren10.value.values

fos10.value.values / ren10.value.values
total10 = fos10.value.values + ren10.value.values
#percentage of fossil fuels
print((fos10.value.values / total10) * 100)
#percentage of green energy
print((ren10.value.values / total10) * 100)


[30.62809696]
[69.37190304]


In [127]:
# Find the percantage between production of electricity from fossil fuels and production of electricity from renewable sources in year 2013?
fos11 = df2[(df2['regionname'] == 'Kenya') & (df2['date'] == 2011) & (df2['indicatorname'] == 'Production of electricity from fossil fuels, GWh')]
fos11.value.values


ren11 = df2[(df2['regionname'] == 'Kenya') & (df2['date'] == 2011) & (df2['indicatorname'] == 'Production of electricity from renewable sources (GWh)')]
ren11.value.values

fos11.value.values / ren11.value.values
total11 = fos11.value.values + ren11.value.values
#percentage of fossil fuels
print((fos11.value.values / total) * 100)
#percentage of green energy
print((ren11.value.values / total) * 100)


[28.57953726]
[59.02803609]


In [129]:
# Find the percantage between production of electricity from fossil fuels and production of electricity from renewable sources in year 2013?
fos12 = df2[(df2['regionname'] == 'Kenya') & (df2['date'] == 2012) & (df2['indicatorname'] == 'Production of electricity from fossil fuels, GWh')]
fos12.value.values


ren12 = df2[(df2['regionname'] == 'Kenya') & (df2['date'] == 2012) & (df2['indicatorname'] == 'Production of electricity from renewable sources (GWh)')]
ren12.value.values

fos12.value.values / ren12.value.values
total12 = fos12.value.values + ren12.value.values
#percentage of fossil fuels
print((fos12.value.values / total) * 100)
#percentage of green energy
print((ren12.value.values / total) * 100)


[36.42559099]
[58.57602351]


In [131]:
# Find the percantage between production of electricity from fossil fuels and production of electricity from renewable sources in year 2013?
fos13 = df2[(df2['regionname'] == 'Kenya') & (df2['date'] == 2013) & (df2['indicatorname'] == 'Production of electricity from fossil fuels, GWh')]
fos13.value.values


ren13 = df2[(df2['regionname'] == 'Kenya') & (df2['date'] == 2013) & (df2['indicatorname'] == 'Production of electricity from renewable sources (GWh)')]
ren13.value.values

fos13.value.values / ren13.value.values
total13 = fos13.value.values + ren13.value.values
#percentage of fossil fuels
print((fos13.value.values / total13) * 100)
#percentage of green energy
print((ren13.value.values / total13) * 100)


[37.88261463]
[62.11738537]


In [132]:
# Find the percantage between production of electricity from fossil fuels and production of electricity from renewable sources in year 2013?
fos14 = df2[(df2['regionname'] == 'Kenya') & (df2['date'] == 2014) & (df2['indicatorname'] == 'Production of electricity from fossil fuels, GWh')]
fos14.value.values


ren14 = df2[(df2['regionname'] == 'Kenya') & (df2['date'] == 2014) & (df2['indicatorname'] == 'Production of electricity from renewable sources (GWh)')]
ren14.value.values

fos14.value.values / ren14.value.values
total14 = fos14.value.values + ren14.value.values
#percentage of fossil fuels
print((fos14.value.values / total14) * 100)
#percentage of green energy
print((ren14.value.values / total14) * 100)


[37.34097646]
[62.65902354]


In [124]:
#what percentage of electricity produced from biofuels and waste in East Africa was from Kenya in year 2014?

bio = df2[(df2['regionname'] == 'Kenya') & (df2['date'] == 2014) & (df2['indicatorname'] == 'Production of electricity from biofuels and waste, GWh')]
x=bio.value.values

ea = df2[(df2['regionname'] == 'East Africa') & (df2['date'] == 2014) & (df2['indicatorname'] == 'Production of electricity from biofuels and waste, GWh')]
y=ea.value.values
print((x/y)*100)

[23.78990731]


In [148]:
import seaborn as sns 

yrlst = [2010, 2011, 2013, 2014]

df4 = df2[(df2['indicatorname'] == 'Production of electricity from renewable sources (GWh)') | (df2['indicatorname'] == 'Production of electricity from fossil fuels, GWh')]

dat = df4[(df4['date'].isin(yrlst)) & (df4['regionname'] == 'Kenya')]

dat.head()

bptken = sns.boxplot(y='value', x='date', data=dat, width=0.8, palette='colorblind')


bptken = sns.stripplot(y='value', x='date', data=dat, jitter=True, marker='*', alpha=0.7, color="black")

bptken.axes.set_title('Production capacity vs date')



Text(0.5, 1.0, 'Production capacity vs date')

In [145]:
df3


Unnamed: 0,indicator,indicatorname,regionname,unit,date,value
0,21773673,Production of Firewood (1000 tonnes),East Africa,thousand tonnes,2014,139288.348282
1,201,"Production of charcoal, 1000 tonnes",East Africa,thousand tonnes,2014,16539.742597
2,506,"Production of electricity from solar, wind, ti...",Kenya,GWh,2014,74.000000
3,506,"Production of electricity from solar, wind, ti...",East Africa,GWh,2014,624.880000
4,303,"Final consumption of oil, 1000 tonnes",Kenya,thousand tonnes,2014,2558.723940
5,507,"Total production of electricity, GWh",Kenya,GWh,2014,9490.000000
6,302,"Refinery output of oil products, 1000 tonnes",East Africa,thousand tonnes,2014,4743.846168
7,302,"Refinery output of oil products, 1000 tonnes",Kenya,thousand tonnes,2014,956.357066
8,507,"Total production of electricity, GWh",East Africa,GWh,2014,44567.880000
9,508,"Final consumption of electricity, GWh",Kenya,GWh,2014,8579.163823


### 