# Load communes

In [1]:
import json
import pandas as pd
import unicodedata

def load_data():
    data = []
    with open('data/paris/communes.json') as f:
        for line in f:
            data.append(json.loads(line))
    return data

def format_url_ville_data(feature, kind="log"):

    # Récupération du nom et du code de la commune
    name, code = feature['name'], feature['id']
    
    # Suppression des accents
    name = ''.join((c for c in unicodedata.normalize('NFD', name) if unicodedata.category(c) != 'Mn'))

    # Remplacement des espaces et autres caractères non alphanumériques par des tirets
    name = name.replace(" ", "-")
    name = name.replace("'", "-")
    name = name.replace("œ", "oe")
    name = ''.join(e for e in name if e.isalnum() or e == '-')

    # Statistique type
    kinds = {
        "log": "logement",
        "pop": "nombre-d-habitants"
    }

    return f"https://ville-data.com/{kinds[kind]}/{name}-75-{code}"



communes = pd.DataFrame(load_data())
communes['url_log'] = communes.apply(lambda x: format_url_ville_data(x, "log"), axis=1)
communes['url_pop'] = communes.apply(lambda x: format_url_ville_data(x, "pop"), axis=1)
communes

Unnamed: 0,name,id,postal,price_apart,price_house,population,url_log,url_pop
0,Paris 1er Arrondissement,75101,75001,12820,13952,16149,https://ville-data.com/logement/Paris-1er-Arro...,https://ville-data.com/nombre-d-habitants/Pari...
1,Paris 2e Arrondissement,75102,75002,11395,11029,21277,https://ville-data.com/logement/Paris-2e-Arron...,https://ville-data.com/nombre-d-habitants/Pari...
2,Paris 3e Arrondissement,75103,75003,12461,12991,33651,https://ville-data.com/logement/Paris-3e-Arron...,https://ville-data.com/nombre-d-habitants/Pari...
3,Paris 4e Arrondissement,75104,75004,13692,16863,29326,https://ville-data.com/logement/Paris-4e-Arron...,https://ville-data.com/nombre-d-habitants/Pari...
4,Paris 5e Arrondissement,75105,75005,12804,14565,58050,https://ville-data.com/logement/Paris-5e-Arron...,https://ville-data.com/nombre-d-habitants/Pari...
5,Paris 6e Arrondissement,75106,75006,16267,17346,40452,https://ville-data.com/logement/Paris-6e-Arron...,https://ville-data.com/nombre-d-habitants/Pari...
6,Paris 7e Arrondissement,75107,75007,14687,15209,49300,https://ville-data.com/logement/Paris-7e-Arron...,https://ville-data.com/nombre-d-habitants/Pari...
7,Paris 8e Arrondissement,75108,75008,12718,15020,36218,https://ville-data.com/logement/Paris-8e-Arron...,https://ville-data.com/nombre-d-habitants/Pari...
8,Paris 9e Arrondissement,75109,75009,10984,13535,60784,https://ville-data.com/logement/Paris-9e-Arron...,https://ville-data.com/nombre-d-habitants/Pari...
9,Paris 10e Arrondissement,75110,75010,9506,10573,83873,https://ville-data.com/logement/Paris-10e-Arro...,https://ville-data.com/nombre-d-habitants/Pari...


# Scrap housing data from the web

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time

# Open the browser
driver = webdriver.Chrome()
driver.get("https://ville-data.com/")

time.sleep(2)

# Consent ville-data.com cookies
query = '//button[@class="fc-button fc-cta-consent fc-primary-button"]'
buttons = driver.find_element(By.XPATH, query)
buttons.click()

The chromedriver version (115.0.5790.102) detected in PATH at /opt/homebrew/bin/chromedriver might not be compatible with the detected chrome version (115.0.5790.170); currently, chromedriver 115.0.5790.170 is recommended for chrome 115.*, so it is advised to delete the driver in PATH and retry


### Scrap housing

In [3]:
from selenium.webdriver.common.by import By
import re 
import time

def extract_log():
    # Find number of housing
    try:
        query = '//div[contains(@id, "Nombre de logements à")]'
        text = driver.find_element(By.XPATH, query).find_element(By.TAG_NAME, 'p').text
        regex = r'(\d+(?:\s*\d+)*)\s*logements'
        match = re.search(regex, text)
        number_of_housing = int(match.group(1).replace(' ', ''))
    except:
        number_of_housing = None

    # Find number of house
    try:
        query = '//div[contains(@id, "Nombre de maisons à")]'
        text = driver.find_element(By.XPATH, query).find_element(By.TAG_NAME, 'p').text
        regex = r'(\d+(?:\s*\d+)*)\s*maisons'
        match = re.search(regex, text)
        number_of_house = int(match.group(1).replace(' ', ''))
    except:
        number_of_house = None

    # Find number of apartment
    try:
        query = '//div[contains(@id, "Nombre d\'appartements à")]'
        text = driver.find_element(By.XPATH, query).find_element(By.TAG_NAME, 'p').text
        regex = r'(\d+(?:\s*\d+)*)\s*appartements'
        match = re.search(regex, text)
        number_of_apartment = int(match.group(1).replace(' ', ''))
    except:
        number_of_apartment = None

    # Logement quality
    try:
        query = '//div[contains(@id, "Qualité des logements à")]'
        text = driver.find_element(By.XPATH, query).find_element(By.TAG_NAME, 'p').text

        match_total = re.search(r'(\d+(?:\s*\d+)*)\s*logements.*?résidence principale', text)
        match_t1 = re.search(r'(\d+(?:\s*\d+)*)\s*logements de 1 pièce', text)
        match_t2 = re.search(r'(\d+(?:\s*\d+)*)\s*logements de 2 pièces', text)
        match_t3 = re.search(r'(\d+(?:\s*\d+)*)\s*résidences principales de 3 pièces', text)
        match_t4 = re.search(r'(\d+(?:\s*\d+)*)\s*logements de 4 pièces', text)
        match_t5_plus = re.search(r'(\d+(?:\s*\d+)*)\s*logements de 5 pièces ou plus', text)

        total_logements = int(match_total.group(1).replace(" ", "")) if match_total else None
        t1_logements = int(match_t1.group(1).replace(" ", "")) if match_t1 else None
        t2_logements = int(match_t2.group(1).replace(" ", "")) if match_t2 else None
        t3_logements = int(match_t3.group(1).replace(" ", "")) if match_t3 else None
        t4_logements = int(match_t4.group(1).replace(" ", "")) if match_t4 else None
        t5_plus_logements = int(match_t5_plus.group(1).replace(" ", "")) if match_t5_plus else None
    except:
        total_logements = None
        t1_logements = None
        t2_logements = None
        t3_logements = None
        t4_logements = None
        t5_plus_logements = None

    return {
        'housing': number_of_housing,
        'house': number_of_house,
        'apartment': number_of_apartment,
        'principal': total_logements,
        't1': t1_logements,
        't2': t2_logements,
        't3': t3_logements,
        't4': t4_logements,
        't5+': t5_plus_logements
    }

def update_log(row):
    driver.get(row['url_log'])
    time.sleep(1)
    values = extract_log()
    for key, value in values.items():
        row[key] = value
    return row


# Update values with selenium
communes = communes.apply(update_log, axis=1)
communes.head()

Unnamed: 0,name,id,postal,price_apart,price_house,population,url_log,url_pop,housing,house,apartment,principal,t1,t2,t3,t4,t5+
0,Paris 1er Arrondissement,75101,75001,12820,13952,16149,https://ville-data.com/logement/Paris-1er-Arro...,https://ville-data.com/nombre-d-habitants/Pari...,13832.0,50,13562,9273,2360,2794,2103,1118,899
1,Paris 2e Arrondissement,75102,75002,11395,11029,21277,https://ville-data.com/logement/Paris-2e-Arron...,https://ville-data.com/nombre-d-habitants/Pari...,17318.0,76,17055,12260,3420,4321,2430,1297,792
2,Paris 3e Arrondissement,75103,75003,12461,12991,33651,https://ville-data.com/logement/Paris-3e-Arron...,https://ville-data.com/nombre-d-habitants/Pari...,26137.0,102,25720,19785,5715,6429,3884,2075,1682
3,Paris 4e Arrondissement,75104,75004,13692,16863,29326,https://ville-data.com/logement/Paris-4e-Arron...,https://ville-data.com/nombre-d-habitants/Pari...,22902.0,100,22231,16187,4361,4853,3387,2051,1536
4,Paris 5e Arrondissement,75105,75005,12804,14565,58050,https://ville-data.com/logement/Paris-5e-Arron...,https://ville-data.com/nombre-d-habitants/Pari...,,242,38598,32094,8838,9350,6721,3809,3376


### Clean data

In [4]:
# Close the driver, not needed anymore
driver.close()

# Fill no apartment and house with 0
communes.apartment.fillna(0, inplace=True)
communes.house.fillna(0, inplace=True)

# Fill no t1, t2, t3, t4, t5+ with 0
columns = ['t1', 't2', 't3', 't4', 't5+']
communes[columns] = communes[columns].fillna(0)

# Fill no housing with sum of apartment and house
index = communes[communes.housing.isna()].index
communes.loc[index, 'housing'] = communes.loc[index, 'apartment'] + communes.loc[index, 'house']

# If no house, and no apartment, fill with estimate mean
index = communes[(communes.house == 0) & (communes.apartment == 0)].index
percent_house = communes.house.sum() / communes.housing.sum()
communes.loc[index, 'house'] = communes.loc[index, 'housing'] * percent_house
communes.loc[index, 'apartment'] = communes.loc[index, 'housing'] * (1 - percent_house)

# Convert columns to int
columns = ['id', 'population', 'housing', 'house', 'apartment', 'principal', 't1', 't2', 't3', 't4', 't5+']
communes[columns] = communes[columns].astype(int)

# house & apartment in the website are count only for principal residence
# Adjust number of house & apartment to consider even non principal residence (estimate)
columns = ['t1', 't2', 't3', 't4', 't5+']
for col in columns:
    communes[col] = (communes[col] * communes['housing'] / communes['principal']).round().astype(int)

# Drop url column
communes.drop(columns=['url_log'], inplace=True)
communes.drop(columns=['url_pop'], inplace=True)

communes.head()

Unnamed: 0,name,id,postal,price_apart,price_house,population,housing,house,apartment,principal,t1,t2,t3,t4,t5+
0,Paris 1er Arrondissement,75101,75001,12820,13952,16149,13832,50,13562,9273,3520,4168,3137,1668,1341
1,Paris 2e Arrondissement,75102,75002,11395,11029,21277,17318,76,17055,12260,4831,6104,3433,1832,1119
2,Paris 3e Arrondissement,75103,75003,12461,12991,33651,26137,102,25720,19785,7550,8493,5131,2741,2222
3,Paris 4e Arrondissement,75104,75004,13692,16863,29326,22902,100,22231,16187,6170,6866,4792,2902,2173
4,Paris 5e Arrondissement,75105,75005,12804,14565,58050,38840,242,38598,32094,10696,11315,8134,4610,4086


In [5]:
communes.to_json('data/paris/communes-housing.json', orient='records', lines=True)

# Load DVF

In [7]:
import pandas as pd

columns = [
    'Nature mutation', 
    'Valeur fonciere',
    'Code postal',
    'Commune', 
    'Code departement', 
    'Code commune',
    'Section', 
    'No plan', 
    'Type local',
    'Surface reelle bati', 
    'Nombre pieces principales'
]

# Load dvfs
dvf = pd.concat([
    pd.read_csv('data/dvf2022.txt', sep='|', low_memory=False),
    pd.read_csv('data/dvf2021.txt', sep='|', low_memory=False),
    pd.read_csv('data/dvf2020.txt', sep='|', low_memory=False),
    pd.read_csv('data/dvf2019.txt', sep='|', low_memory=False),
    pd.read_csv('data/dvf2018.txt', sep='|', low_memory=False),
])


# Open DF and clean it
dvf = dvf[dvf['Type local'].isin(['Appartement', 'Maison'])]
dvf = dvf[dvf['Surface reelle bati'].isna() == False]
dvf = dvf.reset_index(drop=True)
dvf = dvf[columns]

# Keep only selected communes in departement
CODE_DEP = 75
code_communes = communes['id'].apply(lambda x: x - CODE_DEP*1000)
dvf = dvf[(dvf['Code departement'] == str(CODE_DEP).zfill(2)) & (dvf['Code commune'].isin(code_communes))]
dvf = dvf.reset_index(drop=True)

# Convert to int
columns = ['Code commune', 'Nombre pieces principales', 'Surface reelle bati']
dvf[columns] = dvf[columns].astype(int)

# Set nombre pieces principales to 5 for all properties with more than 5 rooms
index = dvf[dvf['Nombre pieces principales'] >= 5].index
dvf.loc[index, 'Nombre pieces principales'] = 5

# Remove Nombre de pieces principales = 0
dvf = dvf[dvf['Nombre pieces principales'] > 0]

dvf

Unnamed: 0,Nature mutation,Valeur fonciere,Code postal,Commune,Code departement,Code commune,Section,No plan,Type local,Surface reelle bati,Nombre pieces principales
0,Vente,58000000,75018.0,PARIS 18,75,118,BR,26,Appartement,20,2
1,Vente,58000000,75018.0,PARIS 18,75,118,BR,26,Appartement,25,2
2,Vente,60500000,75003.0,PARIS 03,75,103,AH,72,Appartement,42,3
3,Vente,71625000,75009.0,PARIS 09,75,109,AC,148,Appartement,69,3
4,Vente,32000000,75010.0,PARIS 10,75,110,AG,47,Appartement,33,2
...,...,...,...,...,...,...,...,...,...,...,...
205722,Vente,22000000,75004.0,PARIS 04,75,104,AQ,127,Appartement,29,1
205723,Vente,119230700,75002.0,PARIS 02,75,102,AD,118,Appartement,150,4
205724,Vente,38300000,75002.0,PARIS 02,75,102,AO,85,Appartement,34,1
205725,Adjudication,64500000,75004.0,PARIS 04,75,104,AS,74,Appartement,54,2


# Compute the average size of houses and apartments according to their number of rooms per city

In [8]:
MINIMUM_DATA = 5

# Get the average surface for each type of property
def get_mean_surface_by_type(df):
    # Mean for each combination of 'Nombre pieces principales' and 'Type local'
    general_mean = dvf.groupby(['Nombre pieces principales', 'Type local'])['Surface reelle bati'].mean()

    # Mean for each combination of 'Code commune', 'Nombre pieces principales' and 'Type local'
    mean_by_commune = dvf.groupby(['Code commune', 'Nombre pieces principales', 'Type local'])['Surface reelle bati'].mean()

    # Number of data for each combination of 'Code commune', 'Nombre pieces principales' and 'Type local'
    count_by_commune = dvf.groupby(['Code commune', 'Nombre pieces principales', 'Type local']).size()

    # For combinations where the number of data is less than MINIMUM_DATA, replace with the general mean
    for index, count in count_by_commune.items():
        if count < MINIMUM_DATA:
            commune, n_pieces, local_type = index
            mean_by_commune[commune, n_pieces, local_type] = general_mean[n_pieces, local_type]

    return mean_by_commune

# Get the average surface for each type of property
dfs = get_mean_surface_by_type(dvf)
dfs = dfs.unstack(level=[1, 2])
dfs.columns = [f"{'H' if col[1][0] == 'M' else col[1][0]}T{col[0]}" for col in dfs.columns]
dfs = dfs.reset_index()

# Fill nan with mean values of the column
columns = ['AT1', 'HT1', 'AT2', 'HT2', 'AT3', 'HT3', 'AT4', 'HT4', 'AT5', 'HT5']
for column in columns:
    dfs[column].fillna(dfs[column].mean(), inplace=True)
dfs.head()

# Add the department code to the city code
dfs['Code commune'] = dfs['Code commune'] + CODE_DEP * 1000
dfs.head()

Unnamed: 0,Code commune,AT1,AT2,AT3,AT4,AT5,HT5,HT4,HT2,HT3,HT1
0,75101,23.33725,42.330239,73.004963,100.768362,168.48913,216.060207,110.335404,62.665201,74.222106,43.202778
1,75102,21.530072,42.051163,67.317789,97.612613,135.435644,216.060207,110.335404,62.665201,74.222106,43.202778
2,75103,22.178996,42.113358,66.367882,94.819307,144.461864,209.230912,110.335404,62.665201,74.222106,43.202778
3,75104,22.863681,41.471326,65.112444,94.896359,160.321739,216.060207,106.90099,62.665201,74.222106,43.202778
4,75105,21.567696,39.436296,60.780325,89.151515,134.107399,198.2,106.90099,61.041667,74.639752,43.202778


In [9]:
# Merge it to communes to get final df
communes = pd.read_json('data/paris/communes-housing.json', orient='records', lines=True)
df = pd.merge(left=communes, right=dfs, left_on='id', right_on='Code commune')
df = df.drop('Code commune', axis=1)
df.head()

Unnamed: 0,name,id,postal,price_apart,price_house,population,housing,house,apartment,principal,...,AT1,AT2,AT3,AT4,AT5,HT5,HT4,HT2,HT3,HT1
0,Paris 1er Arrondissement,75101,75001,12820,13952,16149,13832,50,13562,9273,...,23.33725,42.330239,73.004963,100.768362,168.48913,216.060207,110.335404,62.665201,74.222106,43.202778
1,Paris 2e Arrondissement,75102,75002,11395,11029,21277,17318,76,17055,12260,...,21.530072,42.051163,67.317789,97.612613,135.435644,216.060207,110.335404,62.665201,74.222106,43.202778
2,Paris 3e Arrondissement,75103,75003,12461,12991,33651,26137,102,25720,19785,...,22.178996,42.113358,66.367882,94.819307,144.461864,209.230912,110.335404,62.665201,74.222106,43.202778
3,Paris 4e Arrondissement,75104,75004,13692,16863,29326,22902,100,22231,16187,...,22.863681,41.471326,65.112444,94.896359,160.321739,216.060207,106.90099,62.665201,74.222106,43.202778
4,Paris 5e Arrondissement,75105,75005,12804,14565,58050,38840,242,38598,32094,...,21.567696,39.436296,60.780325,89.151515,134.107399,198.2,106.90099,61.041667,74.639752,43.202778


In [11]:
df.to_json('data/paris/communes-ready.json', orient='records', lines=True)

# Compute the price of each city

In [12]:
import pandas as pd

# Load data
df = pd.read_json('data/paris/communes-ready.json', orient='records', lines=True)

# Count price of one TX for one city
def count_price_tx(row, x):
    x = str(x) 
    tx = row['t' + x if int(x) < 5 else 't5+']
    a = row['apartment']
    h = row['house']
    p = row['principal']
    ATX = row['AT' + x]
    pa = row['price_apart']
    HTX = row['HT' + x]
    ph = row['price_house']
    return tx * ((a/p) * ATX * pa + (h/p) * HTX * ph)

# Count price of all TX for one city
def count_price(row):
    sum = 0
    for i in range(1, 6):
        sum += count_price_tx(row, i)
    return sum

# Count price for all cities
df['city_price'] = df.apply(count_price, axis=1)
df.to_json('data/paris/communes-price.json', orient='records', lines=True)
df

Unnamed: 0,name,id,postal,price_apart,price_house,population,housing,house,apartment,principal,...,AT2,AT3,AT4,AT5,HT5,HT4,HT2,HT3,HT1,city_price
0,Paris 1er Arrondissement,75101,75001,12820,13952,16149,13832,50,13562,9273,...,42.330239,73.004963,100.768362,168.48913,216.060207,110.335404,62.665201,74.222106,43.202778,16614280000.0
1,Paris 2e Arrondissement,75102,75002,11395,11029,21277,17318,76,17055,12260,...,42.051163,67.317789,97.612613,135.435644,216.060207,110.335404,62.665201,74.222106,43.202778,14706190000.0
2,Paris 3e Arrondissement,75103,75003,12461,12991,33651,26137,102,25720,19785,...,42.113358,66.367882,94.819307,144.461864,209.230912,110.335404,62.665201,74.222106,43.202778,23566990000.0
3,Paris 4e Arrondissement,75104,75004,13692,16863,29326,22902,100,22231,16187,...,41.471326,65.112444,94.896359,160.321739,216.060207,106.90099,62.665201,74.222106,43.202778,25794910000.0
4,Paris 5e Arrondissement,75105,75005,12804,14565,58050,38840,242,38598,32094,...,39.436296,60.780325,89.151515,134.107399,198.2,106.90099,61.041667,74.639752,43.202778,33139590000.0
5,Paris 6e Arrondissement,75106,75006,16267,17346,40452,31033,196,30837,22447,...,42.182639,68.740705,94.314925,149.078762,220.076923,110.335404,62.665201,74.639752,43.202778,46788350000.0
6,Paris 7e Arrondissement,75107,75007,14687,15209,49300,39675,298,38336,27130,...,43.338083,72.965578,108.648883,176.7752,550.111111,106.90099,61.041667,74.639752,48.375,67081360000.0
7,Paris 8e Arrondissement,75108,75008,12718,15020,36218,27195,116,26448,17641,...,47.925147,79.97642,112.09292,178.368677,343.285714,106.90099,62.665201,74.639752,43.202778,46731570000.0
8,Paris 9e Arrondissement,75109,75009,10984,13535,60784,40620,190,39477,32057,...,40.116735,62.462678,89.714836,135.970037,209.230912,136.333333,61.041667,74.222106,43.202778,32567210000.0
9,Paris 10e Arrondissement,75110,75010,9506,10573,83873,59790,254,57989,46827,...,37.300913,61.132173,88.034142,126.249462,216.060207,110.335404,62.665201,74.222106,43.202778,36315170000.0


# Add GeoJSON cadastre to each city 

In [28]:
import geopandas as gpd
import pandas as pd

# Load GeoJSON file with geopandas
gdf = gpd.read_file('data/paris/cadastre-75-communes.json')
gdf.drop(['nom', 'created', 'updated'], axis=1, inplace=True)
gdf['id'] = gdf['id'].astype('int')

# Load JSON file with pandas
df = pd.read_json('data/paris/communes-price.json', orient='records', lines=True)

# Merge GeoJSON and JSON files
gdf = gdf.merge(df, on='id', how='right')
gdf['name'] = gdf['name'].apply(lambda x: x[6:])
gdf.head()

Unnamed: 0,id,geometry,name,postal,price_apart,price_house,population,housing,house,apartment,...,AT2,AT3,AT4,AT5,HT5,HT4,HT2,HT3,HT1,city_price
0,75101,"MULTIPOLYGON (((2.35015 48.86199, 2.35021 48.8...",1er Arrondissement,75001,12820,13952,16149,13832,50,13562,...,42.330239,73.004963,100.768362,168.48913,216.060207,110.335404,62.665201,74.222106,43.202778,16614280000.0
1,75102,"MULTIPOLYGON (((2.35095 48.86341, 2.35152 48.8...",2e Arrondissement,75002,11395,11029,21277,17318,76,17055,...,42.051163,67.317789,97.612613,135.435644,216.060207,110.335404,62.665201,74.222106,43.202778,14706190000.0
2,75103,"MULTIPOLYGON (((2.36851 48.85573, 2.36807 48.8...",3e Arrondissement,75003,12461,12991,33651,26137,102,25720,...,42.113358,66.367882,94.819307,144.461864,209.230912,110.335404,62.665201,74.222106,43.202778,23566990000.0
3,75104,"MULTIPOLYGON (((2.36443 48.84614, 2.36529 48.8...",4e Arrondissement,75004,13692,16863,29326,22902,100,22231,...,41.471326,65.112444,94.896359,160.321739,216.060207,106.90099,62.665201,74.222106,43.202778,25794910000.0
4,75105,"MULTIPOLYGON (((2.35176 48.83678, 2.35179 48.8...",5e Arrondissement,75005,12804,14565,58050,38840,242,38598,...,39.436296,60.780325,89.151515,134.107399,198.2,106.90099,61.041667,74.639752,43.202778,33139590000.0


In [154]:
import folium
import numpy as np

MIN_PRICE = np.log(min(gdf['city_price']))
MAX_PRICE = np.log(max(gdf['city_price']))

# Define the style of each location
def style_function(feature):

    color = '#999999'
    min_opacity = 0.1
    max_opacity = 0.3
    
    ARRONDISSEMENTS_PLUS = [16, 17]
    ARRONDISSEMENTS_MOYEN = [18, 19, 20, 12]
    ARRONDISSEMENTS_MOINS = [1, 2, 3, 4, 5, 9, 10]

    # Define color depending on the arrondissement
    for a in ARRONDISSEMENTS_PLUS:
        txt = 'er' if a == 1 else 'e'
        if feature['properties']['name'] == f'{a}{txt} Arrondissement':
            color = '#5472AE'
            min_opacity = 0.3
            max_opacity = 0.8
            break
    
    for a in ARRONDISSEMENTS_MOYEN:
        txt = 'er' if a == 1 else 'e'
        if feature['properties']['name'] == f'{a}{txt} Arrondissement':
            color = '#f5aa42'
            min_opacity = 0.3
            max_opacity = 0.8
            break
    
    for a in ARRONDISSEMENTS_MOINS:
        txt = 'er' if a == 1 else 'e'
        if feature['properties']['name'] == f'{a}{txt} Arrondissement':
            color = '#CC4652'
            min_opacity = 0.3
            max_opacity = 0.8
            break

    # Définir la couleur de remplissage en fonction du log du prix 
    opacity = min_opacity + (max_opacity - min_opacity) * (np.log(feature['properties']['city_price']) - MIN_PRICE) / (MAX_PRICE - MIN_PRICE)

    return {
        'fillColor': color,   # couleur de remplissage
        'color': f'rgba(0, 0, 0, 0.2)',      # couleur de la ligne
        'weight': 2,             # épaisseur de la ligne
        'fillOpacity': opacity,       # opacité du remplissage
        'clickable': False,       # si True, la zone réagit au clic

    }

# Rewrite price to readable format
def format_price(price):
    d = {
        1000000000: 'B€',
        1000000: 'M€',
        1000: 'K€',
    }
    for k in d:
        if price > k:
            return f'{int(round(price / k, 0))}{d[k]}'
    return price


# Define a custom function to create the tooltip (hover popup)
tooltip = folium.GeoJsonTooltip(
    fields=['name'], 
    sticky=False
)

# Créer une carte centrée sur les coordonnées moyennes du GeoDataFrame
m = folium.Map(
    height=2600,
    width=4550,
    location=[gdf.geometry.unary_union.centroid.y-0.01, gdf.geometry.unary_union.centroid.x],
    zoom_start=13.5,
    tiles='https://cartodb-basemaps-{s}.global.ssl.fastly.net/light_nolabels/{z}/{x}/{y}.png',
    attr='&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>'
)

# Add the data to the map with folium
folium.GeoJson(
    gdf, 
    style_function=style_function,
    tooltip=tooltip
).add_to(m)

# Add names centered on each location
for _, row in gdf.iterrows():
    location = [row['geometry'].centroid.y, row['geometry'].centroid.x]
    min_size_text = 26
    max_size_text = 40
    size_text = int(min_size_text + (max_size_text - min_size_text) * (np.log(row['city_price']) - MIN_PRICE) / (MAX_PRICE - MIN_PRICE))
    folium.Marker(location, icon=folium.DivIcon(
        html=f"""
            <div style="width: 300px; transform: translate(-50%, -50%); {"margin-left: -280px; " if row['name'] == '12e Arrondissement' else ''}">
                <h5 style="font-family: 'Arial', sans-serif; font-size: {size_text}px; text-align: center;">
                    {row['name']}
                </h5>
                <h5 style="font-family: 'Arial', sans-serif; font-size: {size_text}px; text-align: center;">
                    {format_price(row['city_price'])}
                </h5>
            </div>
        """
    )).add_to(m)

# Afficher la carte
m

In [155]:
import io
from PIL import Image

img_data = m._to_png(5)
img = Image.open(io.BytesIO(img_data))
img.save('image.png')