# Merging all datasets

We have the following data:
- Google data — monthly
- Indep data — yearly
- World dep variable data — yearly
- MENA dep variable data — monthly

## what this code does:
(1) combine Google data with MENA data and do a moving average of some sort for the yearly indep data, and:

(2) combine to take an average of the Google data across a particular year and merge this with the indep data and world data

In [5]:
import numpy as np
import pandas as pd
import csv

In [6]:
df_final = pd.DataFrame()
dfs = []

file_names = ['Google Trends data/google_all_data_monthly.csv', 'Google Trends data/google_all_data_yearly.csv', 'Independent variable - WB/df_monthly.csv', 'Independent variable - WB/full_dataset_WB_edited.csv', 'Final dependent variable/mena_data.csv', 'Final dependent variable/world_data.csv']
for file_name in file_names:
    with open(file_name, 'r') as file:
        next(file)
        #data = csv.reader(file)
        #df0 = pd.DataFrame(data)
        #dfs.append(df0)
        dfs.append(pd.read_csv(file_name))

In [7]:
# append unrest index (MENA) to Google Data
merged_dfs_MENA = pd.merge(dfs[0], dfs[2], how="inner", left_on=["country_code", "date"], right_on=["economy", "Time"])

# datetime for dep variable dfs
dfs[4]['year'] = dfs[4]['year'].astype(str)
dfs[4]['month'] = dfs[4]['month'].astype(str)
dfs[4]['date'] = pd.to_datetime(dfs[4]['year'] + '-' + dfs[4]['month'], format='%Y-%m')
dfs[4]['date'] = dfs[4]['date'].astype(str)

merged_dfs_MENA = pd.merge(merged_dfs_MENA, dfs[4], how="inner", left_on=["country_code", "date"], right_on=["iso3" ,"date"])
merged_dfs_MENA.to_csv(r'merged_dfs_MENA.csv')
merged_dfs_MENA

Unnamed: 0.1,Unnamed: 0_x,code,protest,revolution,riots,strike,unrest,violence,date,geo_x,...,ST.INT.ARVL,ST.INT.DPRT,geo_y,economy,Unnamed: 0,iso3,country_y,year,month,unrest_index
0,4176,2016-01-01/BH,1.0,11.0,7.0,10.0,0.0,0.0,2016-01-01,BH,...,9.995833e+06,,BH,BHR,888,BHR,Bahrain,2016,1,0.422272
1,4177,2016-02-01/BH,0.0,18.0,3.0,13.0,0.0,8.0,2016-02-01,BH,...,9.930667e+06,,BH,BHR,902,BHR,Bahrain,2016,2,0.578260
2,4178,2016-03-01/BH,2.0,11.0,7.0,9.0,0.0,9.0,2016-03-01,BH,...,9.865500e+06,,BH,BHR,917,BHR,Bahrain,2016,3,0.128619
3,4179,2016-04-01/BH,2.0,16.0,0.0,16.0,0.0,8.0,2016-04-01,BH,...,9.800333e+06,,BH,BHR,932,BHR,Bahrain,2016,4,0.287029
4,4180,2016-05-01/BH,0.0,16.0,6.0,12.0,0.0,3.0,2016-05-01,BH,...,9.735167e+06,,BH,BHR,945,BHR,Bahrain,2016,5,0.101238
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1094,38582,2019-03-01/TN,0.0,13.0,0.0,7.0,,11.0,2019-03-01,TN,...,7.987250e+06,2.343750e+06,TN,TUN,1449,TUN,Tunisia,2019,3,0.278695
1095,38583,2019-04-01/TN,1.0,11.0,0.0,5.0,,8.0,2019-04-01,TN,...,8.091167e+06,2.400833e+06,TN,TUN,1465,TUN,Tunisia,2019,4,0.369249
1096,38584,2019-05-01/TN,0.5,11.0,0.0,6.0,,5.0,2019-05-01,TN,...,8.195083e+06,2.457917e+06,TN,TUN,1481,TUN,Tunisia,2019,5,0.145655
1097,38585,2019-06-01/TN,0.0,10.0,1.0,7.0,,5.0,2019-06-01,TN,...,8.299000e+06,2.515000e+06,TN,TUN,1497,TUN,Tunisia,2019,6,0.278638


In [8]:
merged_dfs_world = pd.merge(dfs[1], dfs[3], how="inner", left_on=["country_code", "date"], right_on=["economy", "Time"])

# datetime for dep variable dfs
dfs[5]['year'] = dfs[5]['year'].astype(str)
dfs[5]['date'] = pd.to_datetime(dfs[5]['year'], format='%Y')
dfs[5]['date'] = dfs[5]['date'].astype(str)

merged_dfs_world = pd.merge(merged_dfs_world, dfs[5], how="inner", left_on=["country_code", "date"], right_on=["iso3" ,"date"])
merged_dfs_world.to_csv(r'merged_dfs_world.csv')
merged_dfs_world

Unnamed: 0.1,Unnamed: 0_x,country_code,date,protest,revolution,riots,strike,unrest,violence,Unnamed: 0_y,...,SP.URB.TOTL.IN.ZS,ST.INT.ARVL,ST.INT.DPRT,geo,economy,Unnamed: 0,iso3,country,year,unrest_index
0,14,ABW,2019-01-01,0.250000,2.083333,0.416667,0.833333,,0.416667,3520,...,43.546,1951000.0,,AW,ABW,1095,ABW,Aruba,2019,0.000168
1,28,AFG,2017-01-01,0.083333,0.416667,0.208333,0.791667,,0.500000,3675,...,25.250,,,AF,AFG,885,AFG,Afghanistan,2017,9.142620
2,29,AFG,2018-01-01,0.208333,0.291667,0.041667,0.958333,,0.541667,3674,...,25.495,,,AF,AFG,960,AFG,Afghanistan,2018,11.513111
3,30,AFG,2019-01-01,0.208333,0.500000,0.166667,0.875000,,0.750000,3673,...,25.754,,,AF,AFG,1096,AFG,Afghanistan,2019,12.389551
4,32,AGO,2005-01-01,16.000000,15.250000,,12.416667,,0.000000,3602,...,56.000,210000.0,,AO,AGO,235,AGO,Angola,2005,0.000897
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
773,3562,ZWE,2015-01-01,0.250000,8.583333,0.500000,2.916667,0.125000,5.916667,5,...,32.385,2057000.0,3393000.0,ZW,ZWE,813,ZWE,Zimbabwe,2015,0.035166
774,3563,ZWE,2016-01-01,0.500000,8.250000,0.416667,3.416667,0.208333,6.166667,4,...,32.296,2168000.0,3192000.0,ZW,ZWE,884,ZWE,Zimbabwe,2016,0.088278
775,3564,ZWE,2017-01-01,1.583333,8.500000,2.916667,4.250000,0.333333,8.666667,3,...,32.237,2423000.0,2768000.0,ZW,ZWE,959,ZWE,Zimbabwe,2017,0.098554
776,3565,ZWE,2018-01-01,0.791667,6.583333,0.750000,3.166667,0.458333,6.916667,2,...,32.209,2580000.0,2288000.0,ZW,ZWE,1094,ZWE,Zimbabwe,2018,0.088772
