## GEOGRAPHICAL FEATURE VISUALIZATION

In [1]:
import folium
import geopandas as gpd
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from folium import Choropleth , Circle , Marker , Icon ,Map

from folium.plugins import HeatMap, MarkerCluster

### DATA WRANGLING

- Let's import the original data

In [2]:
claimsdf = pd.read_csv('/home/julian/Cursos/Ironhack/Proyectos/ProyectoFinal/Claims-Frequency-Predictions/freMTPL2freq.csv')

In [3]:
claimsdf['empirical_frequencies'] = (claimsdf.ClaimNb / claimsdf.Exposure)

In [4]:
def claims_(x):
    """
    arg: A claim count
    returns:if x is bigger than four, returns a four, otherwise, returns the same value
    """
    if x > 4 :
        return 4
    else: 
        return x

In [5]:
claimsdf['ClaimNb'] = claimsdf.ClaimNb.apply(lambda x: claims_(x))

In [6]:
regions = ['Île_de_France', 'Champagne_Ardenne', 'Picardie', 'Haute_Normandie', 'Centre', 'Basse_Normandie', 'Bourgogne', 'Nord_Pas-de-Calais', 'Lorraine', 'Alsace', 'Franche_Comté', 'Pays_de_la_Loire', 'Bretagne', 'Poitou_Charentes', 'Aquitaine', 'Midi_Pyrénées', 'Limousin', 'Rhône_Alpes', 'Auvergne', 'Languedoc_Roussillon', 'Provence-Alpes_Côte d´Azur', 'Corse']
regions_code = ['R11', 'R21', 'R22', 'R23', 'R24', 'R25', 'R26', 'R31', 'R41', 'R42', 'R43', 'R52', 'R53', 'R54', 'R72', 'R73', 'R74', 'R82', 'R83', 'R91', 'R93', 'R94']
for i,j in zip(regions_code, regions):
    claimsdf.Region = claimsdf.Region.str.replace(i, j) 

In [7]:
new_regions = {'Île_de_France': 'Île-de-France',
               'Centre': 'Centre-Val de Loire',
               'Bourgogne': 'Bourgogne-Franche-Comté',
               'Franche_Comté': 'Bourgogne-Franche-Comté',
               'Haute_Normandie': 'Normandie',
               'Basse_Normandie': 'Normandie',
               'Nord_Pas-de-Calais': 'Hauts-de-France',
               'Picardie': 'Hauts-de-France',
               'Champagne_Ardenne': 'Grand Est',
               'Lorraine': 'Grand Est',
               'Alsace': 'Grand Est',
               'Pays_de_la_Loire': 'Pays de la Loire',
               'Bretagne': 'Bretagne', 
               'Aquitaine': 'Nouvelle-Aquitaine',
               'Limousin': 'Nouvelle-Aquitaine',
               'Poitou_Charentes': 'Nouvelle-Aquitaine',               
               'Languedoc_Roussillon': 'Occitanie',
               'Midi_Pyrénées': 'Occitanie',                             
               'Rhône_Alpes': 'Auvergne-Rhône-Alpes',
               'Auvergne': 'Auvergne-Rhône-Alpes',
               'Provence-Alpes_Côte d´Azur':"Provence-Alpes-Côte d'Azur",              
               'Corse': 'Corse'}

In [8]:
claimsdf['Region'] = claimsdf.Region.map(new_regions)

In [9]:
claimsdf['log_density'] = np.log(claimsdf.Density) 

In [10]:
claimsdf.drop(columns=['IDpol', 'Density'], inplace=True)

- Now, we'll group our policies by Region with respecto to the mean of the continuous variables

In [74]:
grouped = claimsdf.groupby('Region').agg({'ClaimNb': 'mean', 'Exposure': 'mean', 'BonusMalus': 'mean', 'empirical_frequencies': 'mean', 'VehPower': 'mean', 'VehAge': 'mean', 'DrivAge': 'mean', 'log_density': 'mean'})

In [75]:
grouped.reset_index(inplace=True)

- Let's reorganize our data frame in a way that matches our geo json

In [77]:
prueba = grouped.to_dict("records")

In [78]:
nueva = []
for value in new_regions.values():
    for elemento in prueba:
        for k,v in elemento.items():
            if k == "Region":
                if value == v:
                    nueva.append(elemento)

In [79]:
final_grouped = pd.DataFrame(nueva)

In [80]:
final_grouped.drop_duplicates(inplace=True)

In [81]:
final_grouped.head(3)

Unnamed: 0,Region,ClaimNb,Exposure,BonusMalus,empirical_frequencies,VehPower,VehAge,DrivAge,log_density
0,Île-de-France,0.056999,0.432844,62.521772,0.413105,6.722758,4.801393,44.624293,8.106634
1,Centre-Val de Loire,0.057266,0.639553,58.835617,0.209384,6.332819,8.95246,46.137315,4.810377
2,Bourgogne-Franche-Comté,0.048062,0.472954,60.914029,0.331357,6.451345,6.042647,44.590878,4.963646


### MAPPING

In [62]:
import os
geo_json = os.path.join("../../regions.geojson")

with open (geo_json) as geo_file:
    geo_france =  json.load(geo_file)

In [None]:
"""for i in range(len(geo_france.get("features"))):
    print(geo_france.get("features")[i].get("properties"))"""

In [89]:
france_regions_map = folium.Map(location=[46.72213865950384,2.6911389975442024], zoom_start=6)

In [90]:
"""
folium.Choropleth(
    geo_data = geo_json, 
    data = final_grouped, 
    columns = ["Region", "log_density"], 
    key_on = "feature.properties.nom", 
    fill_color = "YlGnBu", 
    fill_opacity = 1, 
    line_opacity = 1, 
    legend_name = "Regions", 
    smooth_factor = 0).add_to(france_regions_map)
"""

france_regions_map.choropleth(
    geo_data = geo_json, 
    data = final_grouped, 
    columns = ["Region", "log_density"], 
    key_on = "feature.properties.nom", 
    fill_color = "YlGnBu", 
    fill_opacity = 1, 
    line_opacity = 1, 
    legend_name = "Clusters", 
    smooth_factor = 0)


In [91]:
france_regions_map

In [70]:
france_regions_map.save("choro.png")

#### Now, we proceed to export a csv file with the wrangled data frame for mapping

In [None]:
claimsdf.to_csv('final_grouped.csv', index = False)