In [1]:
import pandas as pd 
from unidecode import unidecode
import plotly.express as px

In [2]:
df_data = pd.read_csv('../data/sale_data/2_sale_enriched.csv')
df_data.head(2)

Unnamed: 0,citta,bagni,stanze,piano,regione,m2,quartiere,via,disponibilità,prezzo,...,emb_248,emb_249,emb_250,emb_251,emb_252,emb_253,emb_254,emb_255,log_prezzo,log_m2
0,rosignano marittimo,1,4,1,toscana,100,Piazza Pietro Gori,missing,Libero,123000.0,...,0.26779,0.836333,-0.325505,0.032583,-0.053504,0.501264,0.229546,0.440637,11.71994,4.60517
1,scandicci,2,4,2,toscana,80,Via Dante,missing,Libero,260000.0,...,0.342138,0.705374,0.029907,-0.156651,0.231911,0.086781,-0.081463,0.15017,12.468437,4.382027


In [3]:
fix_citta_dict = {'barberino tavarnelle': "Barberino Val d'Elsa",'montemagno monferrato': 'Montemagno','borgo veneto': 'Montagnana',
                  'pieve del grappa': 'Borso del Grappa','laterina pergine valdarno': 'Laterina','rio': 'Portoferraio',
                  'calatafimi segesta': 'Calatafimi-Segesta','giardini naxos': 'Giardini-Naxos','negrar di valpolicella': 'Marano di Valpolicella',
                  'bolzano/bozen': 'Bolzano','appiano sulla strada del vino/eppan an der weinstrasse': 'Appiano sulla strada del vino',
                  'laives/leifers': 'Laives','merano/meran': 'Merano','vermezzo con zelo': 'Vermezzo','borgo valbelluna': 'Trichiana',
                  'colceresa': 'Pianezze','barbarano mossano': 'Mossano','sorbolo mezzani': 'Sorbolo','lusiana conco': 'Lusiana'}

def fix_citta(city):   
    if city in fix_citta_dict.keys():       
        return fix_citta_dict[city]   
    return city

In [4]:
lat_long_df = pd.read_json('../data/feature_data/italy_geo.json').iloc[:-2,:]

lat_long_df['comune']=lat_long_df['comune'].map(unidecode).str.replace("'","").str.lower()
lat_long_df.rename({'comune':'citta', 'lng':'long'}, axis=1, inplace=True)
lat_long_df.head(2)

Unnamed: 0,istat,citta,long,lat
0,1001,aglie,7.7686,45.363433
1,1002,airasca,7.48443104,44.916886


In [5]:
df_cities = df_data[['citta', 'prezzo', 'm2']].copy()
df_cities['citta']=df_cities['citta'].map(fix_citta).str.replace("'","").str.lower()
df_cities = df_cities[df_cities['citta']!='missing'].reset_index(drop=True)
df_cities = df_cities.groupby('citta').median().reset_index().rename({'prezzo':'median_price', 'm2':'median_m2'},axis=1)
df_cities = pd.merge(df_cities, lat_long_df[['citta', 'lat', 'long']], on='citta')
df_cities['long'] = df_cities['long'].astype(float)

In [6]:
fig = px.scatter_mapbox(df_cities, 
                        lat="lat", 
                        lon="long", 
                        color="median_price",
                        size='median_m2',
                        zoom=5, 
                        height=800,
                        width=1400)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()