# Alternative Data Source
This notebook takes a look at a combined energy dataset compiled and updated by [*Our World In Data*](https://ourworldindata.org/energy).  
This is provided on GitHub, allowing easy access to the raw project data via a simple 'get_data' script which will aid reproducibility of final results. 

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("https://github.com/owid/energy-data/raw/master/owid-energy-data.csv")

In [3]:
df.describe()

Unnamed: 0,year,coal_prod_change_pct,coal_prod_change_twh,gas_prod_change_pct,gas_prod_change_twh,oil_prod_change_pct,oil_prod_change_twh,energy_cons_change_pct,energy_cons_change_twh,biofuel_cons_change_pct,...,solar_elec_per_capita,solar_energy_per_capita,gdp,wind_share_elec,wind_cons_change_pct,wind_share_energy,wind_cons_change_twh,wind_consumption,wind_elec_per_capita,wind_energy_per_capita
count,10134.0,3906.0,5572.0,3719.0,3948.0,4224.0,4044.0,9842.0,9892.0,519.0,...,6459.0,4284.0,6826.0,5051.0,1543.0,4284.0,4202.0,4284.0,6459.0,4284.0
mean,1995.584468,17.913868,13.441914,251221600000000.0,24.39833,6.13738,26.982328,inf,36.853247,53.993495,...,9.2917,29.41627,745040000000.0,0.900289,313.478014,0.345406,2.166405,15.102057,38.56799,134.190735
std,13.743111,724.910532,163.892636,1.53204e+16,115.021107,70.251951,231.63894,,267.425811,286.541013,...,44.79382,127.93743,5007744000000.0,3.298482,6836.857935,1.337962,17.023106,126.761579,173.055796,513.702989
min,1965.0,-100.0,-2326.87,-100.0,-1054.32,-100.0,-2239.737,-92.632,-6083.407,-100.0,...,0.0,0.0,196308000.0,0.0,-100.0,0.0,-10.409,0.0,0.0,0.0
25%,1985.0,-0.55475,0.0,0.0,0.0,-1.48825,-2.2695,-0.43075,-0.044,0.476,...,0.0,0.0,15670980000.0,0.0,2.1595,0.0,0.0,0.0,0.0,0.0
50%,1996.0,0.0,0.0,1.433,0.1845,0.0,0.0,2.567,0.7355,10.499,...,0.0,0.0,62752740000.0,0.0,19.749,0.0,0.0,0.0,0.0,0.0
75%,2007.0,2.6,0.0,7.903,13.1355,4.569,16.934,6.8765,10.32825,31.5885,...,0.1275,0.3,244000000000.0,0.09,50.0,0.02,0.01075,0.14125,0.4575,4.8395
max,2019.0,44965.754,3060.593,9.34293e+17,2112.975,3242.637,2790.614,inf,6446.809,5620.336,...,712.245,1763.675,107000000000000.0,52.992,242384.843,20.66,428.736,3540.051,2797.961,6928.363


In [4]:
df.sample(5, random_state=5)

Unnamed: 0,iso_code,country,year,coal_prod_change_pct,coal_prod_change_twh,gas_prod_change_pct,gas_prod_change_twh,oil_prod_change_pct,oil_prod_change_twh,energy_cons_change_pct,...,solar_elec_per_capita,solar_energy_per_capita,gdp,wind_share_elec,wind_cons_change_pct,wind_share_energy,wind_cons_change_twh,wind_consumption,wind_elec_per_capita,wind_energy_per_capita
1309,BFA,Burkina Faso,2002,,0.0,,,,,3.924,...,0.0,,16146550000.0,0.0,,,,,0.0,
9536,USA,United States,1985,-1.998,-109.711,-5.706,-271.032,0.518,29.824,-0.248,...,0.045,0.124,7880000000000.0,0.0,-11.217,0.0,-0.002,0.016,0.024,0.067
7783,SPM,Saint Pierre and Miquelon,1982,,0.0,,,,,0.0,...,,,,,,,,,,
5841,MAR,Morocco,1999,0.0,,0.0,,0.0,,3.908,...,0.0,0.0,137000000000.0,0.0,,0.0,0.0,0.0,0.0,0.0
1953,COM,Comoros,1983,,0.0,,,,,0.0,...,,,793145000.0,,,,,,,


In [5]:
col_list = df.columns.to_list()

In [6]:
prefix_list = list(set([x.split("_")[0] for x in col_list]))
# prefix_list

In [7]:
general_prefix_list = ['gdp', 'population', 'primary', 'year',
                       'energy', 'per', 'country', 'iso', 'electricity']

breakdown_prefix_list = ['nuclear', 'coal', 'solar', 'biofuel',
                         'wind', 'fossil', 'gas', 'hydro', 'other',
                         'renewables', 'oil', 'low']

general_col_list = [x for x in col_list if x.split("_")[0] in general_prefix_list]
breakdown_prefix_list = [x for x in col_list if x.split("_")[0] in breakdown_prefix_list]

In [8]:
general_col_list

['iso_code',
 'country',
 'year',
 'energy_cons_change_pct',
 'energy_cons_change_twh',
 'electricity_generation',
 'energy_per_gdp',
 'energy_per_capita',
 'per_capita_electricity',
 'population',
 'primary_energy_consumption',
 'gdp']

In [9]:
breakdown_prefix_list

['coal_prod_change_pct',
 'coal_prod_change_twh',
 'gas_prod_change_pct',
 'gas_prod_change_twh',
 'oil_prod_change_pct',
 'oil_prod_change_twh',
 'biofuel_cons_change_pct',
 'biofuel_share_energy',
 'biofuel_cons_change_twh',
 'biofuel_consumption',
 'biofuel_cons_per_capita',
 'coal_share_elec',
 'coal_cons_change_pct',
 'coal_share_energy',
 'coal_cons_change_twh',
 'coal_consumption',
 'coal_elec_per_capita',
 'coal_cons_per_capita',
 'coal_production',
 'coal_prod_per_capita',
 'coal_electricity',
 'fossil_electricity',
 'gas_electricity',
 'hydro_electricity',
 'nuclear_electricity',
 'oil_electricity',
 'other_renewable_electricity',
 'renewables_electricity',
 'solar_electricity',
 'wind_electricity',
 'fossil_cons_change_pct',
 'fossil_share_energy',
 'fossil_cons_change_twh',
 'fossil_fuel_consumption',
 'fossil_energy_per_capita',
 'fossil_cons_per_capita',
 'fossil_share_elec',
 'gas_share_elec',
 'gas_cons_change_pct',
 'gas_share_energy',
 'gas_cons_change_twh',
 'gas_con