# Alternative Data Source
This notebook takes a look at a combined energy dataset compiled and updated by [*Our World In Data*](https://ourworldindata.org/energy).  
This is provided on GitHub, allowing easy access to the raw project data via a simple 'get_data' script which will aid reproducibility of final results. 

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("https://github.com/owid/energy-data/raw/master/owid-energy-data.csv")

In [3]:
df.describe()

Unnamed: 0,year,coal_prod_change_pct,coal_prod_change_twh,gas_prod_change_pct,gas_prod_change_twh,oil_prod_change_pct,oil_prod_change_twh,energy_cons_change_pct,energy_cons_change_twh,biofuel_share_elec,...,solar_elec_per_capita,solar_energy_per_capita,gdp,wind_share_elec,wind_cons_change_pct,wind_share_energy,wind_cons_change_twh,wind_consumption,wind_elec_per_capita,wind_energy_per_capita
count,17432.0,7445.0,10394.0,4862.0,7893.0,6521.0,8565.0,9842.0,9892.0,4206.0,...,5499.0,4290.0,10456.0,6076.0,1543.0,4284.0,4207.0,4290.0,5499.0,4290.0
mean,1973.094367,20.830774,8.798102,192162300000000.0,14.369018,18.24219,18.033792,inf,36.853247,1.549155,...,14.776571,29.375128,541783300000.0,1.006011,313.478014,0.345406,2.16383,15.080935,53.625783,134.003056
std,34.333995,697.178744,135.503698,1.33991e+16,85.415649,335.492675,169.610587,,267.425811,3.544546,...,58.750432,127.852638,4083842000000.0,3.68055,6836.857935,1.337962,17.013149,126.674141,209.543489,513.368047
min,1900.0,-100.0,-2326.87,-100.0,-1054.32,-100.0,-2239.737,-92.632,-6083.407,0.0,...,0.0,0.0,196308000.0,0.0,-100.0,0.0,-10.409,0.0,0.0,0.0
25%,1946.0,-1.532,0.0,0.0,0.0,-1.429,0.0,-0.43075,-0.044,0.0,...,0.0,0.0,12538040000.0,0.0,2.1595,0.0,0.0,0.0,0.0,0.0
50%,1983.0,0.0,0.0,2.5835,0.0,0.278,0.0,2.567,0.7355,0.0215,...,0.0,0.0,42816490000.0,0.0,19.749,0.0,0.0,0.0,0.0,0.0
75%,2002.0,7.69,0.334,9.7035,2.559,9.091,3.222,6.8765,10.32825,1.36275,...,0.794,0.2955,175000000000.0,0.10825,50.0,0.02,0.01,0.141,3.0465,4.74475
max,2020.0,44965.754,3060.593,9.34293e+17,2112.975,25500.0,2790.614,inf,6446.809,33.912,...,825.094,1763.675,107000000000000.0,56.338,242384.843,20.66,428.736,3540.051,2825.425,6928.363


In [4]:
df.sample(5, random_state=5)

Unnamed: 0,iso_code,country,year,coal_prod_change_pct,coal_prod_change_twh,gas_prod_change_pct,gas_prod_change_twh,oil_prod_change_pct,oil_prod_change_twh,energy_cons_change_pct,...,solar_elec_per_capita,solar_energy_per_capita,gdp,wind_share_elec,wind_cons_change_pct,wind_share_energy,wind_cons_change_twh,wind_consumption,wind_elec_per_capita,wind_energy_per_capita
9332,MLI,Mali,1998,,0.0,,,,,2.297,...,,,9485876000.0,,,,,,,
16682,VEN,Venezuela,2014,-34.871,-3.639,3.915,11.977,0.529,8.462,-3.359,...,0.166,0.572,515000000000.0,0.071,,0.0,0.0,0.0,2.363,0.0
15105,TWN,Taiwan,1987,0.0,,,,,,7.444,...,0.0,0.0,307000000000.0,0.0,,0.0,0.0,0.0,0.0,0.0
4323,,Czechoslovakia,2015,0.0,,,,,,,...,,,467000000000.0,,,,,,,
4634,DMA,Dominica,2008,,0.0,,,,,-9.951,...,4.225,,677075000.0,0.0,,,,,0.0,


In [5]:
col_list = df.columns.to_list()

In [6]:
prefix_list = list(set([x.split("_")[0] for x in col_list]))
# prefix_list

In [7]:
general_prefix_list = ['gdp', 'population', 'primary', 'year',
                       'energy', 'per', 'country', 'iso', 'electricity']

breakdown_prefix_list = ['nuclear', 'coal', 'solar', 'biofuel',
                         'wind', 'fossil', 'gas', 'hydro', 'other',
                         'renewables', 'oil', 'low']

general_col_list = [x for x in col_list if x.split("_")[0] in general_prefix_list]
breakdown_prefix_list = [x for x in col_list if x.split("_")[0] in breakdown_prefix_list]

In [8]:
general_col_list

['iso_code',
 'country',
 'year',
 'energy_cons_change_pct',
 'energy_cons_change_twh',
 'electricity_generation',
 'energy_per_gdp',
 'energy_per_capita',
 'per_capita_electricity',
 'population',
 'primary_energy_consumption',
 'gdp']

In [9]:
breakdown_prefix_list

['coal_prod_change_pct',
 'coal_prod_change_twh',
 'gas_prod_change_pct',
 'gas_prod_change_twh',
 'oil_prod_change_pct',
 'oil_prod_change_twh',
 'biofuel_share_elec',
 'biofuel_elec_per_capita',
 'biofuel_cons_change_pct',
 'biofuel_share_energy',
 'biofuel_cons_change_twh',
 'biofuel_consumption',
 'biofuel_cons_per_capita',
 'coal_share_elec',
 'coal_cons_change_pct',
 'coal_share_energy',
 'coal_cons_change_twh',
 'coal_consumption',
 'coal_elec_per_capita',
 'coal_cons_per_capita',
 'coal_production',
 'coal_prod_per_capita',
 'biofuel_electricity',
 'coal_electricity',
 'fossil_electricity',
 'gas_electricity',
 'hydro_electricity',
 'nuclear_electricity',
 'oil_electricity',
 'other_renewable_electricity',
 'other_renewable_exc_biofuel_electricity',
 'renewables_electricity',
 'solar_electricity',
 'wind_electricity',
 'fossil_cons_change_pct',
 'fossil_share_energy',
 'fossil_cons_change_twh',
 'fossil_fuel_consumption',
 'fossil_energy_per_capita',
 'fossil_cons_per_capita',
