In [None]:
import pandas as pd
import pandas_profiling
import json
from shapely.geometry import Polygon, MultiPolygon
import requests
from shapely.geometry import shape, GeometryCollection
import folium
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', 500)

In [None]:
df = pd.read_csv('pax_data.csv')

In [None]:
info_relevant_vars = ['Con', 'Dat', 'Contp', 'Agtp', 'Status', 'Stage', 'StageSub']
info_secundary_vars = ['PPName', 'Agt', 'Part', 'Loc1ISO', 'Loc2ISO']

filter_candidate_vars = ['GCh', 'GRa', 'GRe', 'GInd', 'GRef', 'GeSo', 'MedGov', 'MedSubs', 'NatRes', 'LaNom', 
                         'LaCHTa', 'LaCHIt', 'SsrDrugs', 'SsrCrOcr', 'StDef', 'CprLife', 'CprTort', 'CprSlav',
                         'CprFspe', 'CprFmov' ,'CprVote']

filter_other_vars = ['StGen', 'StCon', 'StSd', 'StRef', 'StSym', 'StInd', 'StUni', 'StBor', 'StXbor',
                     'LaRef', 'HrDem', 'CprReli', 'LaCHPro', 'LaCH', 'HrCp', 'Terr', 'ImRef']

vars_to_keep = info_relevant_vars + info_secundary_vars + filter_candidate_vars + filter_other_vars

df = df.reindex(columns=vars_to_keep)

df['Dat']= pd.to_datetime(df['Dat'])

In [None]:
profile = df.profile_report(title='Exploring PAX variables', style={'full_width':True})
profile.to_file(output_file="PAX_profile.html")

In [None]:
country_geo = 'data_countries.json'
with open(country_geo) as json_file:
    data = json.load(json_file)
    features = data['features']


countries= dict()

for f in features:
    geom = shape(f['geometry'])
    name = f['properties']['name']
    iso_a3 = f['id']
    
    country = {'geom': geom,
               'iso_a3': iso_a3,
              }
    
    countries[name] = country
    
df_country = pd.DataFrame.from_dict(countries, orient='index')
df_country = df_country.reset_index().rename(columns={'index':'country_name'})


df = df.merge(df_country, 'left', left_on='Loc1ISO', right_on='iso_a3')

In [None]:
filters = {
    'GCh': 'CHILDREN',
    'GRa': 'RACIAL, ETHNIC or NATIONAL GROUPS',
    'GRe': 'RELIGIOUS GROUPS',
    'GInd': 'INDIGENOUS PEOPLE',
    'GRef': 'REFUGEES or DISPLACED PERSONS',
    'GeSo': 'SEXUAL ORIENTATION',
    'MedGov': 'GOVERNANCE OF MEDIA',
    'MedSubs': 'MEDIA ROLES',
    'NatRes': 'NATURAL RESOURCES',
    'LaNom': 'NOMADISM RIGHTS', 
    'LaCHTa': 'CULTURAL HERITAGE: TANGIBLE',
    'LaCHIt': 'CULTURAL HERITAGE: INTANGIBLE',
    'SsrDrugs': 'DRUGS' ,
    'SsrCrOcr': 'ORGANISED CRIME',
    'StDef': 'STATE DEFINITION',
    'CprLife': 'LIFE RIGHTS',
    'CprTort': 'TORTURE',
    'CprSlav': 'SLAVERY',
    'CprFspe': 'FREEDOM OF SPEECH',
    'CprFmov': 'FREEDOM OF MOVEMENT',
    'CprVote': 'VOTE AND TAKE PART',
}


for filt in filters:
    not_in = (df[filt]==0)
    df_reduced = df[~not_in]
    
    
    # MAP
    
    df_groupby = df_reduced.copy()
    df_groupby['count'] = 1
    df_groupby = df_groupby.groupby('Con').sum().reset_index().rename(columns={'index': 'Con'})
    df_groupby = df_groupby.merge(df.reindex(columns=['Con', 'geom']), 'left', 'Con')

    crs = {'init': 'epsg:4326'}
    gdf_groupby = gpd.GeoDataFrame(df_groupby, crs=crs, geometry='geom')
    
    no_geom = gdf_groupby.geom.isna()
    gdf_groupby = gdf_groupby[~no_geom]

    
    hmap = folium.Map(tiles='cartodbpositron', no_touch=True, location=[30, 0], zoom_start=1.55)

    folium.Choropleth(geo_data=gdf_groupby,
                      data=gdf_groupby,
                      columns=['Con','count'],
                      key_on='feature.properties.Con',
                      fill_color='OrRd',
                      fill_opacity=1,
                      bins=4,
                      highlight=True,
                      legend_name='Agreements related with '+filters[filt],
                      line_opacity=1).add_to(hmap)


    style = {'fillColor': '#00000000', 'color': '#00000000'}
    for ind, row in gdf_groupby.iterrows():

        label = row.Con
        num = row['count']
        gs = folium.GeoJson(row.geom, style_function=lambda x: style,
                           tooltip='<b>'+label+'</b><br/><u>Num of agreements:</u> '+str(num))
        gs.add_to(hmap)


    hmap.save(filters[filt]+'.html')
    
    
    
    
    # SCATTERPLOT...
    
    df_plot = df_reduced.reindex(columns=['Dat', 'Stage'])

    f, axes = plt.subplots(figsize=(12,3))
    
    axes.set_title('Agreements related with '+str(filters[filt])+'\n',
                   color='#636261', size='large', weight="bold")

    sns.set_style("whitegrid")
    sns.despine(left=True)


    sns.scatterplot(data=df_plot, x='Dat', y='Stage', s=250, alpha=0.8, ax=axes, marker="*",
                   color = "maroon")

    plt.setp(axes, yticks=[])
    plt.setp(axes, ylabel=None)

    plt.tight_layout()

    plt.savefig(filters[filt]+'.png')