In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt

In [2]:
#Read Data
Exportaciones_netas = pd.read_csv("../csv_results/Exportaciones_Mensuales_Netas.csv")
Incidentes_geopoliticos = pd.read_csv('../csv_results/incidentes_geopoliticos.csv')
Terrorismo = pd.read_csv("../csv_results/Terrorism.csv")
Variaciones_valor = pd.read_csv("../csv_results/Variaciones_Valor_Mensual.csv")

#Delete unnecesary columns in the Incidentes geopoliticos DB
del Incidentes_geopoliticos["Unnamed: 0"]

#Delete unnecesary columns in the Expotaciones_netas DB
Exportaciones_netas = Exportaciones_netas.drop(columns=["Unnamed: 0", "Exp_notes", 'Imp_notes'])

#Delete unnecesary columns in the Terrorismo DB
Terrorismo = Terrorismo.drop(columns=['Unnamed: 0', 'imonth', 'approxdate', 'targtype1', 'targsubtype1', 'doubtterr', 'alternative', 'propextent'])

In [3]:
###Create a list with definitive countries names 
#Problematic countries names are marked with a variant in parenthesis
countries = ['Albania','Algeria','Angola','Argentina','Azerbaijan','Benin','Bolivia','Brazil','Brunei','Cameroon','Canada','China','Colombia','Republic of the Congo', 'Democratic Republic of the Congo', 'Denmark','Ecuador','Egypt','Estonia','Gabon','Georgia','Ghana','Guatemala','Indonesia','Iran','Iraq','Kazakhstan','Kuwait','Kyrgyzstan','Libya','Malaysia','Mexico','Mongolia', 'Myanmar (Burma)','Nigeria','Norway','Oman','Qatar','Russia (Soviet Union)','Saudi Arabia','South Sudan','Sudan','Trinidad and Tobago','Tunisia','Turkmenistan','United Arab Emirates','United Kingdom','Venezuela','Vietnam','Yemen (North Yemen)']

#### Create a dictionary of countries names who may be replaced
#First create a dictionary with countries in each db
db_countries = { "var" : Variaciones_valor["Country"].unique().tolist(),
                "exp" : Exportaciones_netas['Country'].unique().tolist(),
                "inc" : Incidentes_geopoliticos['country'].unique().tolist(),
                "ter" : Terrorismo['country_txt'].unique().tolist() }

#Then for each db_list check which countries do not match with our list and put them in a dictionary
to_change = {'var' : [x for x in db_countries["var"] if x not in countries], 
                    'exp' : [x for x in db_countries["exp"] if x not in countries],
                    'inc' : [x for x in db_countries["inc"] if x not in countries],
                    'ter' : [x for x in db_countries["ter"] if x not in countries] }

#Print the dictionary in order to find which names need to be replaced
to_change

{&#39;var&#39;: [&#39;Burma&#39;, &#39;Congo-Brazzaville&#39;, &#39;Congo-Kinshasa&#39;, &#39;Russia&#39;, &#39;Yemen&#39;],
 &#39;exp&#39;: [&#39;Brunei Darussalam&#39;,
  &#39;Bolivia, Plurinational State of&#39;,
  &#39;Iran, Islamic Republic of&#39;,
  &#39;Myanmar&#39;,
  &#39;Russian Federation&#39;,
  &#39;Venezuela, Bolivarian Republic of&#39;,
  &#39;Viet Nam&#39;,
  &#39;Yemen&#39;],
 &#39;inc&#39;: [&#39;Congo&#39;, &#39;DR Congo (Zaire)&#39;],
 &#39;ter&#39;: [&#39;Myanmar&#39;, &#39;Yemen&#39;, &#39;Russia&#39;]}

In [4]:
#Now replace the names in each db
#Exportaciones netas
Exportaciones_netas["Country"] = Exportaciones_netas["Country"].replace({'Brunei Darussalam' : 'Brunei',
  'Bolivia, Plurinational State of' : 'Bolivia',
  'Iran, Islamic Republic of' : 'Iran' ,
  'Myanmar' : 'Myanmar (Burma)',
  'Russian Federation' : 'Russia (Soviet Union)',
  'Venezuela, Bolivarian Republic of' : 'Venezuela',
  'Viet Nam' : 'Vietnam',
  'Yemen' : 'Yemen (North Yemen)'}) 

#Incidentes geopoliticos
Incidentes_geopoliticos["country"] = Incidentes_geopoliticos["country"].replace({'Congo' : 'Republic of the Congo', 'DR Congo (Zaire)' : 'Democratic Republic of the Congo'})

#Terrorismo
Terrorismo["country_txt"] = Terrorismo["country_txt"].replace({'Myanmar' : 'Myanmar (Burma)', 'Yemen' : 'Yemen (North Yemen)' , 'Russia' : 'Russia (Soviet Union)', 'South Vietnam' : 'Vietnam' })

#Variaciones en el valor 
Variaciones_valor["Country"] = Variaciones_valor["Country"].replace({'Burma' : 'Myanmar (Burma)', 'Congo-Brazzaville' : 'Republic of the Congo', 'Congo-Kinshasa' : 'Democratic Republic of the Congo', 'Russia' : 'Russia (Soviet Union)' , 'Yemen' : 'Yemen (North Yemen)'})

In [5]:
#Create a version of each DB grouped by period
#Sum of succesful terrorism acts, people wounded and killed, and damage of proerty in dollars
g_terrorismo = Terrorismo.groupby("period")["success","nkill", "nwound", "propvalue"].sum()
#Sum of values from each period
g_valores = Variaciones_valor.groupby("Period").sum()
del g_valores["Unnamed: 0"]
#count how many incidentes and how many victimes were in the period
g_incidentes = Incidentes_geopoliticos.groupby("Start_month").agg(Incidents = ("best", "count"), Victims = ("best", "sum"))
#Sum how many net barrels were exported in the period
g_exportaciones = Exportaciones_netas.groupby("Month")["Net_Exports"].sum()

In [6]:
# JOIN ALL DBS, TERRORISMO, INCIDENTES, VALORES Y EXPORTACIONES NETAS (2002/2018)
dataframes = [g_valores, g_exportaciones, g_incidentes, g_terrorismo]
unify_data = pd.concat(dataframes, join='outer', axis=1)
unify_data = unify_data[unify_data["Crude"].notna()].reset_index().sort_values("index")
#rename some columns
unify_data = unify_data.rename(columns={"index" : "Period", 'success' : "Terrorism_at", "nkill" : 'Kills', "nwound" : 'Wound', "propvalue" : 'Pro_Damage'})

In [12]:
unify_data.head()

Unnamed: 0,Period,Crude,NGPL,Others,Refinery,Net_Exports,Incidents,Victims,Terrorism_at,Kills,Wound,Pro_Damage
0,1973-01,32089.771,1110.827649,88.785,206.057087,,,,,,,
1,1973-02,32355.928,1143.809629,88.785,206.057087,,,,,,,
2,1973-03,32456.575,1163.822474,88.785,206.057087,,,,,,,
3,1973-04,32328.209,1164.287773,88.785,206.057087,,,,,,,
4,1973-05,33456.141,1194.923457,124.285,214.619584,,,,,,,


In [13]:
unify_data.to_excel("../Excel_reports/Monthly_data.xlsx")

# Next spets:
### DataFrames Globales
- Export to excel and play with pivot tables
- How to compare counts with money in petroleum

### DataFrames por paises
- Pendiente de como se va hacer

