In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#Read Data
Exportaciones_netas = pd.read_csv("../csv_results/Exportaciones_Mensuales_Netas.csv")
Incidentes_geopoliticos = pd.read_csv('../csv_results/incidentes_geopoliticos.csv')
Terrorismo = pd.read_csv("../csv_results/Terrorism.csv")
Variaciones_valor = pd.read_csv("../csv_results/Variaciones_Valor_Mensual.csv")

#Delete unnecesary columns in the Incidentes geopoliticos DB
del Incidentes_geopoliticos["Unnamed: 0"]
del Variaciones_valor['Unnamed: 0']

#Delete unnecesary columns in the Expotaciones_netas DB
Exportaciones_netas = Exportaciones_netas.drop(columns=["Unnamed: 0", "Exp_notes", 'Imp_notes'])

#Delete unnecesary columns in the Terrorismo DB
Terrorismo = Terrorismo.drop(columns=['Unnamed: 0', 'imonth', 'approxdate', 'targtype1', 'targsubtype1', 'doubtterr', 'alternative', 'propextent'])

In [3]:
###Create a list with definitive countries names 
#Problematic countries names are marked with a variant in parenthesis
countries = ['Albania','Algeria','Angola','Argentina','Azerbaijan','Benin','Bolivia','Brazil','Brunei','Cameroon','Canada','China','Colombia','Republic of the Congo', 'Democratic Republic of the Congo', 'Denmark','Ecuador','Egypt','Estonia','Gabon','Georgia','Ghana','Guatemala','Indonesia','Iran','Iraq','Kazakhstan','Kuwait','Kyrgyzstan','Libya','Malaysia','Mexico','Mongolia', 'Myanmar (Burma)','Nigeria','Norway','Oman','Qatar','Russia (Soviet Union)','Saudi Arabia','South Sudan','Sudan','Trinidad and Tobago','Tunisia','Turkmenistan','United Arab Emirates','United Kingdom','Venezuela','Vietnam','Yemen (North Yemen)']

#### Create a dictionary of countries names who may be replaced
#First create a dictionary with countries in each db
db_countries = { "var" : Variaciones_valor["Country"].unique().tolist(),
                "exp" : Exportaciones_netas['Country'].unique().tolist(),
                "inc" : Incidentes_geopoliticos['country'].unique().tolist(),
                "ter" : Terrorismo['country_txt'].unique().tolist() }

#Then for each db_list check which countries do not match with our list and put them in a dictionary
to_change = {'var' : [x for x in db_countries["var"] if x not in countries], 
                    'exp' : [x for x in db_countries["exp"] if x not in countries],
                    'inc' : [x for x in db_countries["inc"] if x not in countries],
                    'ter' : [x for x in db_countries["ter"] if x not in countries] }

#Print the dictionary in order to find which names need to be replaced
to_change

{'var': ['Burma', 'Congo-Brazzaville', 'Congo-Kinshasa', 'Russia', 'Yemen'],
 'exp': ['Brunei Darussalam',
  'Bolivia, Plurinational State of',
  'Iran, Islamic Republic of',
  'Myanmar',
  'Russian Federation',
  'Venezuela, Bolivarian Republic of',
  'Viet Nam',
  'Yemen'],
 'inc': ['Congo', 'DR Congo (Zaire)'],
 'ter': ['Myanmar', 'Yemen', 'Russia']}

In [4]:
#Now replace the names in each db
#Exportaciones netas
Exportaciones_netas["Country"] = Exportaciones_netas["Country"].replace({'Brunei Darussalam' : 'Brunei',
  'Bolivia, Plurinational State of' : 'Bolivia',
  'Iran, Islamic Republic of' : 'Iran' ,
  'Myanmar' : 'Myanmar (Burma)',
  'Russian Federation' : 'Russia (Soviet Union)',
  'Venezuela, Bolivarian Republic of' : 'Venezuela',
  'Viet Nam' : 'Vietnam',
  'Yemen' : 'Yemen (North Yemen)'}) 

#Incidentes geopoliticos
Incidentes_geopoliticos["country"] = Incidentes_geopoliticos["country"].replace({'Congo' : 'Republic of the Congo', 'DR Congo (Zaire)' : 'Democratic Republic of the Congo'})

#Terrorismo
Terrorismo["country_txt"] = Terrorismo["country_txt"].replace({'Myanmar' : 'Myanmar (Burma)', 'Yemen' : 'Yemen (North Yemen)' , 'Russia' : 'Russia (Soviet Union)', 'South Vietnam' : 'Vietnam' })

#Variaciones en el valor 
Variaciones_valor["Country"] = Variaciones_valor["Country"].replace({'Burma' : 'Myanmar (Burma)', 'Congo-Brazzaville' : 'Republic of the Congo', 'Congo-Kinshasa' : 'Democratic Republic of the Congo', 'Russia' : 'Russia (Soviet Union)' , 'Yemen' : 'Yemen (North Yemen)'})

In [5]:
#Create a version of each DB with selected data
#Sum of succesful terrorism acts, people wounded and killed, and damage of proerty in dollars
Terrorismo = Terrorismo[['period', 'country_txt',  'targtype1_txt', "success", "nkill", "nwound", 'propvalue', "latitude",	"longitude"]]
Terrorismo = Terrorismo.rename(columns={'period' : "Period", 'country_txt' : "Country",  'targtype1_txt': "Targe_type", "success" : "Success", "nkill" : "Kills", "nwound" : "Wounded", 'propvalue' : "Property_damage_value"})

#Sum of values from each period
Valores = Variaciones_valor[['Period', 'Country', 'Crude', 'NGPL', 'Others', 'Refinery']]
Valores = Valores.rename(columns={'Crude' : 'Value_Crude', 'NGPL' : 'Val_NGPL' , 'Others' : 'Val_Others', "Refinery" : 'Val_Refinery'})

#count how many incidentes and how many victimes were in the period
Incidentes = Incidentes_geopoliticos[['Start_month', 'country', 'conflict_name', 'best', "latitude", "longitude"]]
Incidentes = Incidentes.rename(columns={'Start_month' : "Period", 'country' : "Country", "best" : "Victims" })

#Sum how many net barrels were exported in the period
Exportaciones = Exportaciones_netas[[ "Month", "Country", 'Exports (kb/d)', 'Imports (kb/d)', 'Net_Exports']]
Exportaciones = Exportaciones.rename(columns={"Month" : "Period"})

In [6]:
Exportaciones_netas.columns

Index(['Country', 'Month', 'Exports (kb/d)', 'Imports (kb/d)', 'Net_Exports'], dtype='object')

In [23]:
dbs = [Exportaciones, Valores, Incidentes, Terrorismo]


tableau = pd.merge(Exportaciones, Valores, on=['Period', 'Country'], how="outer")

tableau = pd.merge(tableau, Incidentes, on=['Period', 'Country'], how="outer")

tableau = pd.merge(tableau, Terrorismo, on=['Period', 'Country'], how="outer")

tableau["Period"] = pd.to_datetime(tableau["Period"])


# Next Steps

- Probrar la mejor manera de hacer un merge de cada una de las bases de datos
- 

In [31]:
#tableau.iloc[3500:3550]
damage = tableau["Property_damage_value"].unique().tolist()
damage

[nan,
 -99.0,
 6400000.0,
 100000.0,
 50000.0,
 400000.0,
 80000.0,
 160000.0,
 180000.0,
 30000.0,
 200000.0,
 300.0,
 20000.0,
 1000000.0,
 6000000.0,
 300000.0,
 70000.0,
 575000.0,
 45000.0,
 370000.0,
 106000.0,
 120000.0,
 216000.0,
 500000.0,
 600000.0,
 54000.0,
 62000.0,
 21400000.0,
 6607000.0,
 25000.0,
 1500000.0,
 60000.0,
 40000.0,
 84000.0,
 72000.0,
 150000.0,
 800000.0,
 75000.0,
 90000.0,
 5000000.0,
 70000000.0,
 39000.0,
 2000.0,
 1556000.0,
 10000.0,
 4000.0,
 585.0,
 6241.0,
 12000.0,
 125000.0,
 5000.0,
 121200.0,
 3000.0,
 3000000.0,
 230000.0,
 55000.0,
 362000.0]

In [25]:
tableau.to_csv("../csv_results/unified_oil_data.csv")

In [29]:
tableau.dtypes

Period                   datetime64[ns]
Country                          object
Exports (kb/d)                  float64
Imports (kb/d)                  float64
Net_Exports                     float64
Value_Crude                     float64
Val_NGPL                        float64
Val_Others                      float64
Val_Refinery                    float64
conflict_name                    object
Victims                         float64
latitude_x                      float64
longitude_x                     float64
Targe_type                       object
Success                         float64
Kills                           float64
Wounded                         float64
Property_damage_value           float64
latitude_y                      float64
longitude_y                     float64
dtype: object

In [10]:
Exportaciones.head()

Unnamed: 0,Period,Country,Exports (kb/d),Imports (kb/d),Net_Exports
0,2002-01,United Arab Emirates,1860.0,0.0,1860.0
1,2002-02,United Arab Emirates,1989.0,0.0,1989.0
2,2002-03,United Arab Emirates,1868.0,0.0,1868.0
3,2002-04,United Arab Emirates,1669.0,0.0,1669.0
4,2002-05,United Arab Emirates,1821.0,0.0,1821.0


In [11]:
Variaciones_valor.head()

Unnamed: 0,Country,Period,Crude,NGPL,Others,Refinery
0,Albania,1993-01,11.26,0.0,0.0,-0.1
1,Albania,1993-02,11.26,0.0,0.0,-0.1
2,Albania,1993-03,11.26,0.0,0.0,-0.1
3,Albania,1993-04,11.26,0.0,0.0,-0.1
4,Albania,1993-05,11.26,0.0,0.0,-0.1


In [12]:
Incidentes_geopoliticos.head()

Unnamed: 0,conflict_name,year,active_year,country,region,best,high,low,latitude,longitude,Start_month,End_month
0,Serbia (Yugoslavia): Kosovo,1999,1,Albania,Europe,4,4,4,42.450278,20.184167,1999-04,1999-04
1,Serbia (Yugoslavia): Kosovo,1999,1,Albania,Europe,2,2,2,42.404167,20.166667,1999-04,1999-04
2,Serbia (Yugoslavia): Kosovo,1999,1,Albania,Europe,0,2,0,42.198056,20.473889,1999-05,1999-05
3,Serbia (Yugoslavia): Kosovo,1999,1,Albania,Europe,0,1,0,42.244444,20.3425,1999-06,1999-06
4,Algeria: Government,1990,0,Algeria,Africa,1,1,1,36.147656,3.691227,1990-12,1990-12


In [13]:
Terrorismo.head()

Unnamed: 0,Period,Country,Targe_type,Success,Kills,Wounded,Property_damage_value,latitude,longitude
0,1972-01,Kuwait,Business,1,0.0,0.0,,,
1,1972-08,Iran,Business,0,0.0,0.0,,35.724533,51.40519
2,1972-08,Iran,Utilities,0,0.0,0.0,,30.345997,48.287136
3,1973-11,Venezuela,Utilities,1,,,,,
4,1973-12,Argentina,Business,1,0.0,0.0,,-34.61768,-58.444435
