# Merging data from OWID and WorldBank



In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import altair as alt
import math
import os

In [2]:
os.listdir('output')

['owid_china_impute.csv',
 'owid_india_impute.csv',
 'owid_us_impute.csv',
 'worldbank_china_impute.csv',
 'worldbank_india_impute.csv',
 'worldbank_us_impute.csv']

In [3]:
# function to read specific csv files from the output folder and merge them
def merge_owid_worldbank_data(country):
    
    country = country.lower() # just in case, lol
    
    df_owid = pd.read_csv(f'output/owid_{country}_impute.csv')
    df_worldbank = pd.read_csv(f'output/worldbank_{country}_impute.csv')
    
    df_country = df_owid.merge(df_worldbank, how='left', on='year')
    df_country.drop(['iso_code_x', 'country_x'], axis=1, inplace=True)
    df_country.rename(columns={'iso_code_y': 'iso_code',
                               'country_y': 'country'}, inplace=True)
    
    # save merged dfs
    df_country.to_csv(f'output/merged_{country}.csv', index=False)
    
    return df_country

In [4]:
df_china = merge_owid_worldbank_data('china')
df_china

Unnamed: 0,year,co2,co2_growth_prct,co2_growth_abs,co2_per_capita,share_global_co2,cumulative_co2,share_global_cumulative_co2,co2_per_gdp,coal_co2,...,Renewable electricity output (% of total electricity output),Renewable energy consumption (% of total final energy consumption),"School enrollment, primary and secondary (gross), gender parity index (GPI)",Total greenhouse gas emissions (% change from 1990),Total greenhouse gas emissions (kt of CO2 equivalent),Urban population,Urban population (% of total population),Urban population growth (annual %),iso_code,country
0,2001,3514.934,2.2,75.782,2.706,13.81,78398.967,7.32,0.555,2428.857,...,18.958519,28.334999,0.96462,32.184715,4791440.0,471767321.0,37.093,4.059566,CHN,China
1,2002,3872.595,10.18,357.662,2.962,14.87,82271.562,7.5,0.568,2778.77,...,17.618628,26.978399,0.943115,40.646591,5100860.0,491993700.0,38.425,4.198001,CHN,China
2,2003,4545.014,17.36,672.419,3.456,16.61,86816.576,7.72,0.627,3319.28,...,15.03704,23.841499,1.001983,59.966148,5717660.0,512473984.0,39.776,4.078404,CHN,China
3,2004,5223.755,14.93,678.741,3.948,18.25,92040.331,7.99,0.667,3835.288,...,16.223266,20.160801,1.02237,82.821379,6467060.0,533257098.0,41.144,3.975372,CHN,China
4,2005,5876.555,12.5,652.801,4.416,19.85,97916.886,8.28,0.683,4424.39,...,16.175025,17.4408,1.053162,100.467167,7194480.0,554367818.0,42.522,3.882473,CHN,China
5,2006,6488.804,10.42,612.248,4.848,21.22,104405.69,8.61,0.684,4905.374,...,15.592783,16.385099,0.98804,120.244306,7879330.0,575118254.0,43.868,3.674728,CHN,China
6,2007,6978.612,7.55,489.808,5.185,22.16,111384.302,8.95,0.674,5289.904,...,15.263347,14.8837,0.99155,134.390133,8497040.0,595670841.0,45.199,3.511256,CHN,China
7,2008,7496.832,7.43,518.22,5.539,23.38,118881.134,9.31,0.694,5729.474,...,17.737444,14.1378,0.99697,157.458544,8786120.0,616481190.0,46.539,3.433958,CHN,China
8,2009,7886.533,5.2,389.701,5.794,24.95,126767.667,9.69,0.682,6028.5,...,17.863798,13.4319,1.00335,172.749831,9377660.0,637407288.0,47.88,3.338102,CHN,China
9,2010,8616.653,9.26,730.12,6.295,25.84,135384.319,10.09,0.67,6568.794,...,18.622826,12.2613,0.98895,187.303978,10203820.0,658498663.0,49.226,3.255365,CHN,China


In [5]:
df_us = merge_owid_worldbank_data('us')
df_us

Unnamed: 0,year,co2,co2_growth_prct,co2_growth_abs,co2_per_capita,share_global_co2,cumulative_co2,share_global_cumulative_co2,co2_per_gdp,coal_co2,...,Renewable electricity output (% of total electricity output),Renewable energy consumption (% of total final energy consumption),"School enrollment, primary and secondary (gross), gender parity index (GPI)",Total greenhouse gas emissions (% change from 1990),Total greenhouse gas emissions (kt of CO2 equivalent),Urban population,Urban population (% of total population),Urban population growth (annual %),iso_code,country
0,2001,5904.882,-1.76,-105.627,20.747,23.2,310217.648,28.98,0.451,2115.059,...,6.783992,4.6787,0.782444,11.16577,6806970.0,225792302.0,79.234,1.21338,USA,United States
1,2002,5946.771,0.71,41.889,20.7,22.84,316164.418,28.83,0.446,2119.322,...,8.646361,4.8408,0.74823,13.782275,6646960.0,228400290.0,79.409,1.148419,USA,United States
2,2003,6011.837,1.09,65.067,20.744,21.97,322176.256,28.67,0.438,2158.577,...,8.889376,5.3263,0.886371,13.936582,6706390.0,230876596.0,79.583,1.07836,USA,United States
3,2004,6114.007,1.7,102.169,20.913,21.36,328290.263,28.48,0.43,2190.331,...,8.630186,5.4777,1.014797,18.059992,6787570.0,233532722.0,79.757,1.143885,USA,United States
4,2005,6134.521,0.34,20.515,20.795,20.72,334424.784,28.29,0.417,2214.838,...,8.578466,5.8412,1.00387,17.058327,6801820.0,236200507.0,79.928,1.135885,USA,United States
5,2006,6051.668,-1.35,-82.853,20.324,19.79,340476.452,28.08,0.401,2183.112,...,9.244631,6.3967,1.378431,13.98273,6707430.0,238999326.0,80.099,1.177968,USA,United States
6,2007,6131.004,1.31,79.336,20.395,19.47,346607.456,27.86,0.399,2208.712,...,8.367764,6.3042,1.634938,16.180624,6797860.0,241795278.0,80.269,1.163068,USA,United States
7,2008,5914.078,-3.54,-216.926,19.487,18.44,352521.534,27.62,0.386,2174.329,...,9.003541,6.8456,1.922989,8.358701,6619980.0,244607104.0,80.438,1.156186,USA,United States
8,2009,5478.21,-7.37,-435.868,17.885,17.33,357999.744,27.37,0.368,1905.943,...,10.291845,7.3544,2.321405,7.626602,6194450.0,247276259.0,80.606,1.08529,USA,United States
9,2010,5675.786,3.61,197.577,18.368,17.02,363675.53,27.12,0.372,2013.304,...,10.120355,7.4357,2.477131,9.40754,6442580.0,249849720.0,80.772,1.035345,USA,United States


In [6]:
df_india = merge_owid_worldbank_data('india')
df_india

Unnamed: 0,year,co2,co2_growth_prct,co2_growth_abs,co2_per_capita,share_global_co2,cumulative_co2,share_global_cumulative_co2,co2_per_gdp,coal_co2,...,Renewable electricity output (% of total electricity output),Renewable energy consumption (% of total final energy consumption),"School enrollment, primary and secondary (gross), gender parity index (GPI)",Total greenhouse gas emissions (% change from 1990),Total greenhouse gas emissions (kt of CO2 equivalent),Urban population,Urban population (% of total population),Urban population growth (annual %),iso_code,country
0,2001,992.56,1.39,13.641,0.923,3.9,19932.284,1.86,0.342,582.912,...,13.207044,51.858501,0.80988,35.190929,1741320.0,300118526.0,27.918,2.631896,IND,India
1,2002,1023.027,3.07,30.467,0.936,3.93,20955.311,1.91,0.342,599.45,...,12.055161,50.808102,0.83037,38.196166,1763940.0,308796506.0,28.244,2.850502,IND,India
2,2003,1059.616,3.58,36.589,0.953,3.87,22014.927,1.96,0.33,626.782,...,13.469425,50.8298,0.9157,42.459946,1808910.0,317584393.0,28.572,2.806108,IND,India
3,2004,1125.471,6.21,65.855,0.996,3.93,23140.399,2.01,0.326,670.922,...,14.482501,50.0728,0.85204,50.668683,1894780.0,326495070.0,28.903,2.767126,IND,India
4,2005,1185.953,5.37,60.482,1.033,4.01,24326.352,2.06,0.32,722.497,...,16.619018,48.532398,0.89566,52.623872,1969680.0,335503761.0,29.235,2.721832,IND,India
5,2006,1259.744,6.22,73.791,1.081,4.12,25586.096,2.11,0.313,774.666,...,17.540425,46.063202,0.973918,60.934515,2065570.0,344622641.0,29.569,2.681686,IND,India
6,2007,1358.152,7.81,98.408,1.148,4.31,26944.249,2.17,0.312,843.677,...,17.864086,44.9034,0.94256,71.692385,2209240.0,353850624.0,29.906,2.642483,IND,India
7,2008,1462.815,7.71,104.662,1.218,4.56,28407.063,2.23,0.318,912.093,...,16.509834,43.405102,0.96705,75.77943,2309340.0,363154576.0,30.246,2.595371,IND,India
8,2009,1612.817,10.25,150.002,1.324,5.1,30019.88,2.3,0.326,985.349,...,15.660055,42.107498,0.98997,87.018352,2452660.0,372465918.0,30.587,2.531697,IND,India
9,2010,1677.888,4.03,65.071,1.359,5.03,31697.767,2.36,0.316,1017.376,...,16.043744,41.112801,0.98905,99.763086,2564130.0,381763164.0,30.93,2.465489,IND,India
