# Merging all datasets

We have the following data:
- Google data — monthly
- Indep data — yearly
- World dep variable data — yearly
- MENA dep variable data — monthly

## what this code does:
(1) combine Google data with MENA data and do a moving average of some sort for the yearly indep data, and:

(2) combine to take an average of the Google data across a particular year and merge this with the indep data and world data

In [139]:
import numpy as np
import pandas as pd
import csv

In [140]:
df_final = pd.DataFrame()
dfs = []

file_names = ['Google Trends data/google_all_data_monthly.csv', 'Google Trends data/google_all_data_yearly.csv', 'Independent variable - WB/df_monthly.csv', 'Independent variable - WB/full_dataset_WB_edited.csv', 'Final dependent variable/mena_data.csv', 'Final dependent variable/world_data.csv']
for file_name in file_names:
    with open(file_name, 'r') as file:
        next(file)
        #data = csv.reader(file)
        #df0 = pd.DataFrame(data)
        #dfs.append(df0)
        dfs.append(pd.read_csv(file_name))

In [141]:
# append unrest index (MENA) to Google Data
merged_dfs_MENA = pd.merge(dfs[0], dfs[2], how="inner", left_on=["country_code", "date"], right_on=["economy", "Time"])

# datetime for dep variable dfs
dfs[4]['year'] = dfs[4]['year'].astype(str)
dfs[4]['month'] = dfs[4]['month'].astype(str)
dfs[4]['date'] = pd.to_datetime(dfs[4]['year'] + '-' + dfs[4]['month'], format='%Y-%m')
dfs[4]['date'] = dfs[4]['date'].astype(str)

merged_dfs_MENA = pd.merge(merged_dfs_MENA, dfs[4], how="inner", left_on=["country_code", "date"], right_on=["iso3" ,"date"])
merged_dfs_MENA.to_csv(r'merged_dfs_MENA.csv')
merged_dfs_MENA

Unnamed: 0.1,Unnamed: 0_x,date,hits,keyword,geo_x,month_x,year_x,country_x,country_code,Unnamed: 0_y,...,ST.INT.ARVL,ST.INT.DPRT,geo_y,economy,Unnamed: 0,iso3,country_y,year_y,month_y,unrest_index
0,2306,2004-07-01,0.0,protest,DZ,7,2004,Algeria,DZA,386,...,,,DZ,DZA,195,DZA,Algeria,2004,7,0.184742
1,2536,2004-07-01,29.0,revolution,DZ,7,2004,Algeria,DZA,386,...,,,DZ,DZA,195,DZA,Algeria,2004,7,0.184742
2,2766,2004-07-01,0.0,riots,DZ,7,2004,Algeria,DZA,386,...,,,DZ,DZA,195,DZA,Algeria,2004,7,0.184742
3,2996,2004-07-01,100.0,strike,DZ,7,2004,Algeria,DZA,386,...,,,DZ,DZA,195,DZA,Algeria,2004,7,0.184742
4,3226,2004-07-01,0.0,violence,DZ,7,2004,Algeria,DZA,386,...,,,DZ,DZA,195,DZA,Algeria,2004,7,0.184742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6015,210636,2019-07-01,1.0,protest,TN,7,2019,Tunisia,TUN,38201,...,8.393167e+06,2.537167e+06,TN,TUN,1512,TUN,Tunisia,2019,7,0.360317
6016,210866,2019-07-01,11.0,revolution,TN,7,2019,Tunisia,TUN,38201,...,8.393167e+06,2.537167e+06,TN,TUN,1512,TUN,Tunisia,2019,7,0.360317
6017,211096,2019-07-01,1.0,riots,TN,7,2019,Tunisia,TUN,38201,...,8.393167e+06,2.537167e+06,TN,TUN,1512,TUN,Tunisia,2019,7,0.360317
6018,211326,2019-07-01,4.0,strike,TN,7,2019,Tunisia,TUN,38201,...,8.393167e+06,2.537167e+06,TN,TUN,1512,TUN,Tunisia,2019,7,0.360317


In [142]:
merged_dfs_world = pd.merge(dfs[1], dfs[3], how="inner", left_on=["country_code", "date"], right_on=["economy", "Time"])

# datetime for dep variable dfs
dfs[5]['year'] = dfs[5]['year'].astype(str)
dfs[5]['date'] = pd.to_datetime(dfs[5]['year'], format='%Y')
dfs[5]['date'] = dfs[5]['date'].astype(str)

merged_dfs_world = pd.merge(merged_dfs_world, dfs[5], how="inner", left_on=["country_code", "date"], right_on=["iso3" ,"date"])
merged_dfs_world.to_csv(r'merged_dfs_world.csv')
merged_dfs_world

Unnamed: 0.1,Unnamed: 0_x,country_code,date,hits,Unnamed: 0_y,Country,Time,BN.CAB.XOKA.GD.ZS,BX.KLT.DINV.WD.GD.ZS,EG.CFT.ACCS.ZS,...,SP.URB.TOTL.IN.ZS,ST.INT.ARVL,ST.INT.DPRT,geo,economy,Unnamed: 0,iso3,country,year,unrest_index
0,14,ABW,2019-01-01,0.800000,3520,Aruba,2019-01-01,2.516995,-2.232924,,...,43.546,1951000.0,,AW,ABW,1095,ABW,Aruba,2019,0.000168
1,28,AFG,2017-01-01,0.400000,3675,Afghanistan,2017-01-01,-18.812615,0.272719,29.7,...,25.250,,,AF,AFG,885,AFG,Afghanistan,2017,9.142620
2,29,AFG,2018-01-01,0.408333,3674,Afghanistan,2018-01-01,-21.156793,0.648440,30.9,...,25.495,,,AF,AFG,960,AFG,Afghanistan,2018,11.513111
3,30,AFG,2019-01-01,0.500000,3673,Afghanistan,2019-01-01,-20.058384,0.123804,31.9,...,25.754,,,AF,AFG,1096,AFG,Afghanistan,2019,12.389551
4,32,AGO,2005-01-01,10.916667,3602,Angola,2005-01-01,13.897092,-3.526655,42.6,...,56.000,210000.0,,AO,AGO,235,AGO,Angola,2005,0.000897
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
773,3562,ZWE,2015-01-01,3.048611,5,Zimbabwe,2015-01-01,-7.998095,1.999687,29.5,...,32.385,2057000.0,3393000.0,ZW,ZWE,813,ZWE,Zimbabwe,2015,0.035166
774,3563,ZWE,2016-01-01,3.159722,4,Zimbabwe,2016-01-01,-3.394138,1.669274,29.8,...,32.296,2168000.0,3192000.0,ZW,ZWE,884,ZWE,Zimbabwe,2016,0.088278
775,3564,ZWE,2017-01-01,4.375000,3,Zimbabwe,2017-01-01,-1.542184,1.746885,29.8,...,32.237,2423000.0,2768000.0,ZW,ZWE,959,ZWE,Zimbabwe,2017,0.098554
776,3565,ZWE,2018-01-01,3.111111,2,Zimbabwe,2018-01-01,-4.039183,2.101721,29.9,...,32.209,2580000.0,2288000.0,ZW,ZWE,1094,ZWE,Zimbabwe,2018,0.088772
