In [1]:
import pandas as pd
from collections import defaultdict
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import numpy as np
import pickle

import folium
import branca.colormap as cm
import os
import json
import plot_function
import map_visualization

In [2]:
resident_foreigners_norm = pd.read_table("Data_final/resident_foreigners_norm.csv", sep = "\t")

resident_norm = pd.read_table("Data_final/resident_norm.csv", sep = "\t")

provincia_regione = pd.read_table("Data_final/regioni.csv", sep = "\t")
# to have a more readible db
provincia_regione["Regione"] = provincia_regione["Regione"].replace({"Provincia Autonoma Bolzano / Bozen": "Provincia Autonoma di Bolzano", 
                                                                    "Provincia Autonoma Trento": "Provincia Autonoma di Trento", 
                                                                    "Valle d'Aosta / Vallée d'Aoste": "Valle d'Aosta"})

regione_zona = pd.read_table("Data_final/territori.csv", sep = "\t")
# to have a more readible db
regione_zona["Regione"] = regione_zona["Regione"].replace({"Provincia Autonoma Bolzano / Bozen": "Provincia Autonoma di Bolzano", 
                                                                    "Provincia Autonoma Trento": "Provincia Autonoma di Trento", 
                                                                    "Valle d'Aosta / Vallée d'Aoste": "Valle d'Aosta"})
geo_info = pd.read_table("Data_final/cepii_geo_info.csv", sep = "\t",)

years = sorted(list(set(resident_foreigners_norm["Year"])))

In [3]:
# Region distribution - aggragation over the year
regioni_aggr = {}

for r in list(set(regione_zona["Regione"])):
    regioni_aggr[r] = sum([sum(resident_foreigners_norm[resident_foreigners_norm["Province"] == i]["Value"].values) for i in provincia_regione[provincia_regione["Regione"] == r]["Provincia"].values])

In [4]:
# Region distribution across the years I

# Absolute values - foreigners
regioni = {}

for r in list(set(regione_zona["Regione"])):
    regioni[r] = {y: sum([sum(resident_foreigners_norm[(resident_foreigners_norm["Province"] == i) 
                                                          & (resident_foreigners_norm["Year"] == y)]["Value"].values) 
                             for i in provincia_regione[provincia_regione["Regione"] == r]["Provincia"].values]) 
                  for y in years}

# Absolute values - native
regioni_native = {}
for r in list(set(regione_zona["Regione"])):
    regioni_native[r] = {y: sum([sum(resident_norm[(resident_norm["Province"] == i) 
                                                          & (resident_norm["Year"] == y)]["Value"].values) 
                             for i in provincia_regione[provincia_regione["Regione"] == r]["Provincia"].values]) 
                  for y in years}

In [5]:
# Region distribution across the years I

# Growth
regioni_growth = {}
for r in regioni.keys():
    regioni_growth[r] = {str(y+1)+"-"+str(y): regioni[r][y+1] - regioni[r][y] for y in years[:-1]}
    
# Relative values - divided by the previous year value
regioni_prev_growth = {}
for r in regioni.keys():
    regioni_prev_growth[r] = {str(y+1)+"-"+str(y): 1.*(regioni[r][y+1] - regioni[r][y])/regioni[r][y] for y in years[:-1]}
    
    
# Relative values - normalized by native population
regioni_norm_native = {}
for r in regioni.keys():
    regioni_norm_native[r] = {y: 0 if regioni[r][y]==0 and regioni_native[r][y] == 0 else 1.*regioni[r][y]/regioni_native[r][y] for y in years}
    
# Growth - normalized by native population
regioni_growth_norm_native = {}
for r in regioni_norm_native.keys():
    regioni_growth_norm_native[r] = {str(y+1)+"-"+str(y): regioni_norm_native[r][y+1] - regioni_norm_native[r][int(y)] for y in years[:-1]}

In [6]:
# the key "y" (year) and the values have to be a string to be written in the json --> transform it before the dump
regioni_temp = {k1: dict([str(k2), str(v2)] for k2, v2 in v1.items()) for k1, v1 in regioni.items()}
with open('Statistics/region_abs.json', 'w') as outfile:
    json.dump(regioni_temp, outfile)
    
regioni_growth_temp = {k1: dict([k2, str(v2)] for k2, v2 in v1.items()) for k1, v1 in regioni_growth.items()}
with open('Statistics/region_abs_growth.json', 'w') as outfile:
    json.dump(regioni_growth_temp, outfile)
    
regioni_prev_growth_temp = {k1: dict([k2, str(v2)] for k2, v2 in v1.items()) for k1, v1 in regioni_prev_growth.items()}
with open('Statistics/region_prev_growth.json', 'w') as outfile:
    json.dump(regioni_prev_growth_temp, outfile)
    
regioni_norm_native_temp = {k1: dict([str(k2), str(v2)] for k2, v2 in v1.items()) for k1, v1 in regioni_norm_native.items()}
with open('Statistics/region_native_norm.json', 'w') as outfile:
    json.dump(regioni_norm_native_temp, outfile)

regioni_growth_norm_native_temp = {k1: dict([k2, str(v2)] for k2, v2 in v1.items()) for k1, v1 in regioni_growth_norm_native.items()}
with open('Statistics/region_native_norm_growth.json', 'w') as outfile:
    json.dump(regioni_growth_norm_native_temp, outfile)

In [7]:
# Zone distribution across the years

# Absolute values - foreigners
zone = {}

for z in list(set(regione_zona["Zona"])):
    temp = regione_zona[regione_zona["Zona"] == z]["Regione"].values
    zone[z] = {y: sum([regioni[t][y] for t in temp]) for y in years}
    
# Absolute values - native
zone_native = {}
for z in list(set(regione_zona["Zona"])):
    temp = regione_zona[regione_zona["Zona"] == z]["Regione"].values
    zone_native[z] = {y: sum([regioni_native[t][y] for t in temp]) for y in years}
    
# Growth
zone_growth = {}
for z in zone.keys():
    #zone_growth[z] = [zone[z][2013]]+[zone[z][y+1] - zone[z][y] for y in years[:-1]]
    zone_growth[z] = {str(y+1)+"-"+str(y): zone[z][y+1] - zone[z][y] for y in years[:-1]}
    
# Relative values - divided by the previous year value
zone_prev_growth = {}
for z in zone.keys():
    zone_prev_growth[z] = {str(y+1)+"-"+str(y): 1.*(zone[z][y+1] - zone[z][y])/zone[z][y] for y in years[:-1]}
    

# Relative values - normalized by native population
zone_norm_native = {}
for z in zone.keys():
    zone_norm_native[z] = {y: 0 if zone[z][y]==0 and zone_native[z][y] == 0 else 1.*zone[z][y]/zone_native[z][y] for y in years}
    
# Growth - normalized by native population
zone_growth_norm_native = {}
for z in zone_norm_native.keys():
    zone_growth_norm_native[z] = {str(y+1)+"-"+str(y): zone_norm_native[z][y+1] - zone_norm_native[z][y] for y in years[:-1]}
    
# Growth - normalized by native population
zone_growth_norm_native2 = {}
for z in zone_norm_native.keys():
    zone_growth_norm_native2[z] = {str(y+1)+"-"+str(y): 1.*(zone[z][y+1] - zone[z][y])/(zone_native[z][y+1] - zone_native[z][y]) for y in years[:-1]}

In [8]:
# the key "y" (year) and the values have to be a string to be written in the json --> transform it before the dump
zone_temp = {k1: dict([str(k2), str(v2)] for k2, v2 in v1.items()) for k1, v1 in zone.items()}
with open('Statistics/zone_abs.json', 'w') as outfile:
    json.dump(zone_temp, outfile)
    
zone_growth_temp = {k1: dict([k2, str(v2)] for k2, v2 in v1.items()) for k1, v1 in zone_growth.items()}
with open('Statistics/zone_abs_growth.json', 'w') as outfile:
    json.dump(zone_growth_temp, outfile)
    
zone_prev_growth_temp = {k1: dict([k2, str(v2)] for k2, v2 in v1.items()) for k1, v1 in zone_prev_growth.items()}
with open('Statistics/zone_prev_growth.json', 'w') as outfile:
    json.dump(zone_prev_growth_temp, outfile)
    
zone_norm_native_temp = {k1: dict([str(k2), str(v2)] for k2, v2 in v1.items()) for k1, v1 in zone_norm_native.items()}
with open('Statistics/zone_native_norm.json', 'w') as outfile:
    json.dump(zone_norm_native_temp, outfile)

zone_growth_norm_native_temp = {k1: dict([k2, str(v2)] for k2, v2 in v1.items()) for k1, v1 in zone_growth_norm_native.items()}
with open('Statistics/zone_native_norm_growth.json', 'w') as outfile:
    json.dump(zone_growth_norm_native_temp, outfile)

In [9]:
# continent information stored in geo_info db
# dictionary with structure: {continent: [list iso3 countries]}
continent = {i: j.values for i, j in geo_info.groupby(["continent"])["iso3"]}
continent = {i: list(set(j)) for i, j in continent.items()}

In [10]:
# also store the dictionary containing the color - region/zone matchin
color_region_list = ["black", "grey", "lightgray", "firebrick", "red", 
                     "salmon", "chocolate", "saddlebrown", "peachpuff", 
                     "burlywood", "khaki", "gold", "greenyellow", 
                     "darkgreen", "g", "springgreen", "aquamarine", 
                     "cornflowerblue", "navy", "darkorchid", "orchid"]

color_region_dict = {r:c for r, c in zip(sorted(regioni.keys()), color_region_list)}

color_zone_list = ["red", "gold", "greenyellow", "cornflowerblue","orchid"]

color_zone_dict = {z:c for z, c in zip(sorted(zone.keys()), color_zone_list)}
color_continent_dict = {"America": "greenyellow", "Pacific": "gold", "Europe": "red", 
                        "Asia": "cornflowerblue", "Africa": "orchid"}

with open('Statistics/color_region_dict.json', 'w') as outfile:
    json.dump(color_region_dict, outfile)
    
with open('Statistics/color_zone_dict.json', 'w') as outfile:
    json.dump(color_zone_dict, outfile)

with open('Statistics/color_continent_dict.json', 'w') as outfile:
    json.dump(color_continent_dict, outfile)    

In [11]:
'''
Costruire palette per ogni country in modo intelligente: 
- ogni continente è rappresentata dalla sfumatura del corrispondente colore in color_continent_dict
'''
color_country_dict = {}
countries_list = []
for i in continent.keys():
    palette = sns.light_palette(color_continent_dict[i], len(continent[i]))
    continent_stream = sorted(continent[i])
    for c in continent_stream:
        pos = continent_stream.index(c)
        # string 'cause we can not dump an array
        color_country_dict[c] = [str(c) for c in palette[pos]]
        countries_list.append(c)

with open('Statistics/color_country_dict.json', 'w') as outfile:
    json.dump(color_country_dict, outfile) 
    
# countries list in the right order order by continent
with open('Statistics/countries_list.txt', 'wb') as fp:
    pickle.dump(countries_list, fp)

In [16]:
# 53 colors palette
Africa_palette_list = ["black", "grey", "lightgray", "whitesmoke", "rosybrown", "lightcoral", "indianred", "brown", 
                       "maroon", "red","mistyrose", "darksalmon", "coral", "lightsalmon", "sienna", "chocolate", 
                       "sandybrown", "peachpuff", "peru", "darkorange", "burlywood", "goldenrod", "gold", "khaki", 
                       "darkkhaki", "olive", "y", "olivedrab", "yellowgreen", "darkolivegreen", "chartreuse", 
                       "darkseagreen", "darkgreen", "green", "mediumseagreen", "springgreen", "turquoise",
                       "paleturquoise", "cyan", "deepskyblue", "lightskyblue", "cornflowerblue", "royalblue", 
                       "midnightblue", "blue", "slateblue", "mediumpurple", "rebeccapurple", "indigo", "m", 
                       "fuchsia", "mediumvioletred", "palevioletred"]
Africa_palette = {cou: c for cou, c in zip(sorted(continent["Africa"]), Africa_palette_list)}
with open('Statistics/Africa_palette.json', 'w') as outfile:
    json.dump(Africa_palette, outfile)

# 49 colors palette
Asia_palette_list = ["black", "grey", "lightgray", "whitesmoke", "rosybrown", "lightcoral", "indianred", "brown", 
                     "maroon", "red","mistyrose",  "coral", "lightsalmon", "sienna", "chocolate", "sandybrown", 
                     "peachpuff", "peru", "darkorange", "burlywood", "goldenrod", "gold", "khaki", "darkkhaki",
                     "olive", "y", "olivedrab", "yellowgreen", "darkolivegreen", "chartreuse", "darkseagreen", 
                     "darkgreen", "green", "mediumseagreen", "turquoise", "paleturquoise", "cyan", "deepskyblue", 
                     "lightskyblue", "cornflowerblue", "midnightblue", "blue", "mediumpurple", "rebeccapurple", 
                     "indigo", "m", "fuchsia", "mediumvioletred", "palevioletred"]
Asia_palette = {cou: c for cou, c in zip(sorted(continent["Asia"]), Asia_palette_list)}
with open('Statistics/Asia_palette.json', 'w') as outfile:
    json.dump(Asia_palette, outfile)

# 44 colors palette
Europe_palette_list = ["black", "grey", "lightgray", "whitesmoke", "rosybrown", "lightcoral", "indianred", "brown", 
                       "maroon", "red", "mistyrose",  "coral", "lightsalmon", "sienna", "chocolate", "sandybrown", 
                       "peachpuff", "peru", "burlywood", "goldenrod", "gold", "khaki", "y", "olivedrab", "yellowgreen", 
                       "darkolivegreen", "chartreuse", "darkseagreen", "darkgreen", "green", "mediumseagreen", 
                       "turquoise", "paleturquoise", "deepskyblue", "lightskyblue", "cornflowerblue", "midnightblue", 
                       "blue", "mediumpurple", "rebeccapurple", "m", "fuchsia", "mediumvioletred", "palevioletred"]
Europe_palette = {cou: c for cou, c in zip(sorted(continent["Europe"]), Europe_palette_list)}
with open('Statistics/Europe_palette.json', 'w') as outfile:
    json.dump(Europe_palette, outfile)

# 35 colors palette
America_palette_list = ["black", "grey", "lightgray", "whitesmoke", "lightcoral", "indianred", "brown", "maroon", "red", 
                        "coral", "lightsalmon", "sienna", "chocolate", "sandybrown", "peachpuff", "burlywood", 
                        "goldenrod", "gold", "khaki", "y", "olivedrab", "yellowgreen", "darkolivegreen", "green", 
                        "mediumseagreen", "turquoise", "paleturquoise", "deepskyblue", "lightskyblue", 
                        "cornflowerblue", "midnightblue", "blue", "mediumpurple", "m", "fuchsia"]
America_palette = {cou: c for cou, c in zip(sorted(continent["America"]), America_palette_list)}
with open('Statistics/America_palette.json', 'w') as outfile:
    json.dump(America_palette, outfile)
    
# 14 colors palette
Pacific_palette_list = ["black", "grey", "lightgray", "indianred", "maroon", "lightsalmon", "gold", "yellowgreen", 
                        "green", "paleturquoise", "deepskyblue", "blue", "mediumpurple", "fuchsia"]
Pacific_palette = {cou: c for cou, c in zip(sorted(continent["Pacific"]), Pacific_palette_list)}
with open('Statistics/Pacific_palette.json', 'w') as outfile:
    json.dump(Pacific_palette, outfile)

In [12]:
# italian distribution across sex
gender_dict = defaultdict(list)
gender_dict["male"] = [sum(resident_foreigners_norm[(resident_foreigners_norm["Gender"] == "male") 
                                       & (resident_foreigners_norm["Year"] == y)]["Value"]) for y in years]
gender_dict["female"] = [sum(resident_foreigners_norm[(resident_foreigners_norm["Gender"] == "female") 
                                       & (resident_foreigners_norm["Year"] == y)]["Value"]) for y in years]

In [13]:
# gender comparison
gender_temp = {k1: [str(i) for i in v1] for k1, v1 in gender_dict.items()}
with open('Statistics/gender_distribution.json', 'w') as outfile:
    json.dump(gender_temp, outfile)

In [14]:
# Province by origin country
temp = {i: sum(j.values) for i, j in resident_foreigners_norm.groupby(["Province", "Year", "Country"])["Value"]}

all_c = list(set(resident_foreigners_norm["Country"]))
all_p = list(set(resident_foreigners_norm["Province"]))
py_prov_country = {p: {c: [0 for y in years] for c in all_c} for p in all_p}

for k, v in temp.items():
    py_prov_country[k[0]][k[2]][years.index(k[1])] = v

In [15]:
py_prov_country_temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} for k1, v1 in py_prov_country.items()}
with open('Statistics/py_prov_country.json', 'w') as outfile:
    json.dump(py_prov_country_temp, outfile)

In [16]:
# Region aggregation
all_r = list(set(provincia_regione["Regione"]))
py_region_country = {r: {c: [0 for y in years] for c in all_c} for r in all_r}

for p, d in py_prov_country.items():
    r = provincia_regione[provincia_regione["Provincia"] == p]["Regione"].values[0]
    for c, v in d.items():
        py_region_country[r][c] = [i+j for i, j in zip(py_region_country[r][c], np.array(v))]

In [17]:
py_region_country_temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} for k1, v1 in py_region_country.items()}
with open('Statistics/py_region_country.json', 'w') as outfile:
    json.dump(py_region_country_temp, outfile)

In [18]:
# Zone aggregation
all_z = list(set(regione_zona["Zona"]))
py_zone_country = {z: {c: [0 for y in years] for c in all_c} for z in all_z}

for r, d in py_region_country.items():
    z = regione_zona[regione_zona["Regione"] == r]["Zona"].values[0]
    for c, v in d.items():
        py_zone_country[z][c] = [i+j for i, j in zip(py_zone_country[z][c], np.array(v))]

In [19]:
py_zone_country_temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} for k1, v1 in py_zone_country.items()}
with open('Statistics/py_zone_country.json', 'w') as outfile:
    json.dump(py_zone_country_temp, outfile)

In [20]:
# Province by origin continent
all_cont = list(continent.keys())
py_prov_continent = {p: {c: [0 for y in years] for c in all_cont} for p in py_prov_country.keys()}

for p, d in py_prov_country.items():
    for c, v in d.items():
        cont = next(k for k in continent.keys() if c in continent[k])
        py_prov_continent[p][cont] = [i+j for i, j in zip(py_prov_continent[p][cont], np.array(v))]

In [21]:
py_prov_continent_temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} for k1, v1 in py_prov_continent.items()}
with open('Statistics/py_prov_continent.json', 'w') as outfile:
    json.dump(py_prov_continent_temp, outfile)

In [22]:
# Region aggregation
py_region_continent = {p: {c: [0 for y in years] for c in all_cont} for p in py_region_country.keys()}

for p, d in py_region_country.items():
    for c, v in d.items():
        cont = next(k for k in continent.keys() if c in continent[k])
        py_region_continent[p][cont] = [i+j for i, j in zip(py_region_continent[p][cont], np.array(v))]

In [23]:
py_region_continent_temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} for k1, v1 in py_region_continent.items()}
with open('Statistics/py_region_continent.json', 'w') as outfile:
    json.dump(py_region_continent_temp, outfile)

In [24]:
# Zone aggregation
py_zone_continent = {p: {c: [0 for y in years] for c in all_cont} for p in py_zone_country.keys()}

for p, d in py_zone_country.items():
    for c, v in d.items():
        cont = next(k for k in continent.keys() if c in continent[k])
        py_zone_continent[p][cont] = [i+j for i, j in zip(py_zone_continent[p][cont], np.array(v))]

In [25]:
py_zone_continent_temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} for k1, v1 in py_zone_continent.items()}
with open('Statistics/py_zone_continent.json', 'w') as outfile:
    json.dump(py_zone_continent_temp, outfile)

In [27]:
# Top 10 countries for each continent (top 10 as aggregate values on the 2003-2017 period)
# Province aggregation

py_prov_top10_countrie_cont = {p: {} for p in all_p}
py_prov_top5_countrie_cont = {p: {} for p in all_p}

for p in py_prov_country.keys():
    for cont in continent.keys():
        cont_countries = continent[cont]
        temp = sorted([(sum(py_prov_country[p][i]), i) for i in cont_countries])
        temp10 = temp[-10:]
        temp5 = temp[-5:]
        temp10_top_idx = [i[1] for i in temp10]
        temp5_top_idx = [i[1] for i in temp5]
        py_prov_top10_countrie_cont[p].update({i: py_prov_country[p][i] for i in temp10_top_idx})
        py_prov_top5_countrie_cont[p].update({i: py_prov_country[p][i] for i in temp5_top_idx})

In [28]:
py_prov_top10_countrie_cont_temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} for k1, v1 in py_prov_top10_countrie_cont.items()}
with open('Statistics/py_prov_top10_countrie_cont.json', 'w') as outfile:
    json.dump(py_prov_top10_countrie_cont_temp, outfile)
    
py_prov_top5_countrie_cont_temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} for k1, v1 in py_prov_top5_countrie_cont.items()}
with open('Statistics/py_prov_top5_countrie_cont.json', 'w') as outfile:
    json.dump(py_prov_top5_countrie_cont_temp, outfile)

In [29]:
# Region aggregation

py_region_top10_countrie_cont = {r: {} for r in all_r}
py_region_top5_countrie_cont = {r: {} for r in all_r}

for r in py_region_country.keys():
    for cont in continent.keys():
        cont_countries = continent[cont]
        temp = sorted([(sum(py_region_country[r][i]), i) for i in cont_countries])
        temp10 = temp[-10:]
        temp10_top_idx = [i[1] for i in temp10]
        py_region_top10_countrie_cont[r].update({i: py_region_country[r][i] for i in temp10_top_idx})
        temp5 = temp[-5:]
        temp5_top_idx = [i[1] for i in temp5]
        py_region_top5_countrie_cont[r].update({i: py_region_country[r][i] for i in temp5_top_idx})

In [30]:
py_region_top10_countrie_cont_temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} for k1, v1 in py_region_top10_countrie_cont.items()}
with open('Statistics/py_region_top10_countrie_cont.json', 'w') as outfile:
    json.dump(py_region_top10_countrie_cont_temp, outfile)
    
py_region_top5_countrie_cont_temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} for k1, v1 in py_region_top5_countrie_cont.items()}
with open('Statistics/py_region_top5_countrie_cont.json', 'w') as outfile:
    json.dump(py_region_top5_countrie_cont_temp, outfile)

In [31]:
# Zone aggregation

py_zone_top10_countrie_cont = {z: {} for z in all_z}
py_zone_top5_countrie_cont = {z: {} for z in all_z}

for z in py_zone_country.keys():
    for cont in continent.keys():
        cont_countries = continent[cont]
        temp = sorted([(sum(py_zone_country[z][i]), i) for i in cont_countries])
        temp10 = temp[-10:]
        temp10_top_idx = [i[1] for i in temp10]
        py_zone_top10_countrie_cont[z].update({i: py_zone_country[z][i] for i in temp10_top_idx})
        temp5 = temp[-5:]
        temp5_top_idx = [i[1] for i in temp5]
        py_zone_top5_countrie_cont[z].update({i: py_zone_country[z][i] for i in temp5_top_idx})

In [32]:
py_zone_top10_countrie_cont_temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} for k1, v1 in py_zone_top10_countrie_cont.items()}
with open('Statistics/py_zone_top10_countrie_cont.json', 'w') as outfile:
    json.dump(py_zone_top10_countrie_cont_temp, outfile)
    
py_zone_top5_countrie_cont_temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} for k1, v1 in py_zone_top5_countrie_cont.items()}
with open('Statistics/py_zone_top5_countrie_cont.json', 'w') as outfile:
    json.dump(py_zone_top5_countrie_cont_temp, outfile)

In [77]:
# Countries distribution for each continent
# Province aggregation

py_prov_cont_countries = {cont: {p: {} for p in all_p} for cont in all_cont}

for cont in all_cont:
    cont_countries = continent[cont]
    for p in all_p:
        py_prov_cont_countries[cont][p].update({i: py_prov_country[p][i] for i in cont_countries})

In [83]:
for cont in all_cont:    
    temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} 
                                        for k1, v1 in py_prov_cont_countries[cont].items()}
    with open('Statistics/py_prov_'+cont+'_countries.json', 'w') as outfile:
        json.dump(temp, outfile)

In [78]:
# Region aggregation

py_region_cont_countries = {cont: {r: {} for r in all_r} for cont in all_cont}

for cont in all_cont:
    cont_countries = continent[cont]
    for r in all_r:
        py_region_cont_countries[cont][r].update({i: py_region_country[r][i] for i in cont_countries})

In [84]:
for cont in all_cont:    
    temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} 
                                        for k1, v1 in py_region_cont_countries[cont].items()}
    with open('Statistics/py_region_'+cont+'_countries.json', 'w') as outfile:
        json.dump(temp, outfile)

In [79]:
# Zone aggregation

py_zone_cont_countries = {cont: {z: {} for z in all_z} for cont in all_cont}

for cont in all_cont:
    cont_countries = continent[cont]
    for z in all_z:
        py_zone_cont_countries[cont][z].update({i: py_zone_country[z][i] for i in cont_countries})

In [85]:
for cont in all_cont:    
    temp = {k1: {k2:[str(i) for i in v2] for k2, v2 in v1.items()} 
                                        for k1, v1 in py_zone_cont_countries[cont].items()}
    with open('Statistics/py_zone_'+cont+'_countries.json', 'w') as outfile:
        json.dump(temp, outfile)

In [None]:
a = sorted(regioni.keys())
for i in a:
    j = list(regioni[i].values())
    print("| %s | %s | %s | %s | %s | %s | %s | %s | %s | %s | " %(i, j[0], j[1], j[2], j[3], j[4], j[5], j[6], j[7], j[8]))

In [None]:
a = sorted(regioni.keys())
for i in a:
    j = list(regioni[i].values())
    print("| %s | %s | %s | %s | %s | %s | %s |" %(i, j[9], j[10], j[11], j[12], j[13], j[14]))