# ⚙️ **CLEANED DATA IMPORT**

In [None]:
import pandas as pd
import numpy as np
import os

DATA_PATH = '../data/cleaned'

POI_FILENAME = 'poi_df_cleaned.csv'
SITE_FILENAME = 'site_df_cleaned.csv'
SALARY_FILENAME = 'salary_df_cleaned.csv'
GEOREF_FILENAME = 'georef_df_cleaned.csv'
STOCK_FILENAME = 'stock_df_cleaned.csv'
SALES_FILENAME = 'sales_df_cleaned.csv'
POPULATION_FILENAME = 'population_df_cleaned.csv'
POVERTY_FILENAME = 'poverty_df_cleaned.csv'
REAL_ESTATE_FILENAME = 'real_estate_df_cleaned.csv'

poi_df = pd.read_csv(os.path.join(DATA_PATH, POI_FILENAME))
site_df = pd.read_csv(os.path.join(DATA_PATH, SITE_FILENAME))
salary_df = pd.read_csv(os.path.join(DATA_PATH, SALARY_FILENAME))
georef_df = pd.read_csv(os.path.join(DATA_PATH, GEOREF_FILENAME))
stock_df = pd.read_csv(os.path.join(DATA_PATH, STOCK_FILENAME))
sales_df = pd.read_csv(os.path.join(DATA_PATH, SALES_FILENAME))
population_df = pd.read_csv(os.path.join(DATA_PATH, POPULATION_FILENAME))
poverty_df = pd.read_csv(os.path.join(DATA_PATH, POVERTY_FILENAME))
real_estate_df = pd.read_csv(os.path.join(DATA_PATH, REAL_ESTATE_FILENAME))

In [None]:
# DF CLEANED CHECK
poi_df.info()
site_df.info()
salary_df.info()
georef_df.info() 
print (stock_df.info())
sales_df.info()
population_df.info() 
poverty_df.info()
real_estate_df.info()

In [None]:
#test
stock_df_test = stock_df.merge(georef_df, on="municipality_code")
stock_df_test = stock_df_test.groupby(["year", "department_name"])["nb_second_home"].sum().reset_index()
filtered_df = stock_df_test[stock_df_test["department_name"] == "Guyane"]
filtered_df


### CLEANING

##### DF_SALES CLEANING

In [None]:


# SALES_DF: Suppression des doublons > nous passons de 4,3M de lignes à 3,821M
sales_df = sales_df.drop_duplicates()
sales_df.shape

In [None]:
# SALES_DF: Check si les doublons on été enlevés : OK
sales_df.duplicated().sum()

In [None]:
# SALES_DF: Suppression des prix au m2 supérieur à 30K€ et inférieur à 1K€ > nous passons à 3,3399M de lignes
sales_df = sales_df[(sales_df['sales_price_m2'] <= 30000) & (sales_df['sales_price_m2'] >= 500)]
sales_df.shape

In [None]:
# SALES_DF:
s2 = (sales_df['sales_amount']
             .value_counts()
             .loc[sales_df['sales_amount'].value_counts() > 10])

In [None]:
# SALES_DF:
sales_df = sales_df[sales_df['sales_amount'] > 1] # on enlève les 166 fois ou sales_amount = 1€
sales_df.shape

In [255]:
# SALES_DF: changement du type sales_date en datetime
sales_df['sales_date'] = pd.to_datetime(sales_df['sales_date'])
sales_df["municipality_code"].nunique()

32743

##### DF_SALARY CLEANING

In [None]:
# DF_SALARY: ROUND avg_net_salary
salary_df['avg_net_salary'] = salary_df['avg_net_salary'].round()
salary_df.head()

##### DF_REAL_ESTATE CLEANING

In [None]:
# DF_REAL_ESTATE: suppression des nulls
real_estate_df = real_estate_df.dropna(axis=1)
real_estate_df.isnull().sum()

##### DF_SITE CLEANING

In [None]:
# SITE_DF: tri avec les données entre parenthèses de la colonne "name" inclues

import re

site_df['data_inside_parenthesis'] = site_df['name'].apply(lambda x: re.search(r'\((.*?)\)', x).group(1) if re.search(r'\((.*?)\)', x) else '')
site_df

#suppression de la colonne "name" dans un second temps

site_df.drop(columns=["name"])

#check pour savoir les informations présentes dans la colonne "poi", et si elles correspondent aux valeurs présentes dans la colonne "type"
print (site_df["poi"].value_counts())
print (site_df["data_inside_parenthesis"].value_counts().head(50))

#faire un mapping des colonnes poi, qui sont en fait plus pertinentes que celles de la colonne "type"

In [None]:
# SITE_DF: création d'un dictionnaire intégrant toutes les différentes valeurs inclues dans la colonne "poi"
s = site_df["poi"].value_counts()[site_df["poi"]]
{k: "toto" for k in s.index}

In [None]:
# SITE_DF: création d'un dictionnaire avec les catégories associées aux valeurs de la colonne POI

category_dict = {'1': 'Patrimoine',
 '2': 'Patrimoine',
 'zoo': 'Entertainment',
 'dune': 'Nature',
 'park': 'Nature',
 'rock': 'Nature',
 'sand': 'Nature',
 'beach': 'Nature',
 'cliff': 'Nature',
 'islet': 'Nature',
 'ridge': 'Nature',
 'water': 'Nature',
 'wreck': 'Patrimoine',
 'casino': 'Entertainment',
 'castle': 'Patrimoine',
 'cinema': 'Culture',
 'forest': 'Nature',
 'geyser': 'Nature',
 'marina': 'Nature',
 'meadow': 'Nature',
 'museum': 'Culture',
 'valley': 'Nature',
 'theatre': 'Culture',
 'volcano': 'Nature',
 'wetland': 'Nature',
 'heritage': 'Patrimoine',
 'monument': 'Patrimoine',
 'vineyard': 'Nature',
 'viewpoint': 'Nature',
 'waterfall': 'Nature',
 'allotments': 'Patrimoine',
 'attraction': 'Entertainment',
 'theme_park': 'Entertainment',
 'water_park': 'Entertainment',
 'golf_course': 'Entertainment',
 'cave_entrance': 'Culture',
 'national_park': 'Nature',
 'protected_area': 'Nature'}

In [None]:
# SITE_DF: création de la colonne "catégorie"
site_df["Category"] = site_df["poi"].map(category_dict)
site_df

In [None]:
poi_df.info()
site_df.info()
salary_df.info()
georef_df.info() 
stock_df.info() 
sales_df.info()
population_df.info() 
poverty_df.info()
real_estate_df.info()

In [None]:
poi_df.head(1)

In [None]:
site_df.head(1) 

In [None]:
salary_df.head(1)

In [None]:
georef_df.head(1) 

In [None]:
stock_df.head(1) 

In [None]:
sales_df.head(1)

In [None]:
population_df.head(1) 

In [None]:
poverty_df.head(1)

In [None]:
real_estate_df.head(1)

# 🧪 **DATA TRANSFORMATION**

### KPIS AGGREGATION BY DEPARTMENT

##### 1. TOURISM

In [None]:
#création de tables permettant de scorer le potentiel touristique de chaque département
site_df = site_df.merge (georef_df, on=["municipality_code"])
site_df.head(5)

In [None]:
#sélection des colonnes dont on aura besoin pour le calcul
site_df_department = site_df[["poi", "name", "municipality_code", "importance", "name_reprocessed", "department_name"]]
site_df_department

In [None]:
#groupement par département, puis classement par le département ayant le + d'atouts touristiques
group_site = site_df_department.groupby("department_name")[["importance"]].sum()
group_site
group_site.sort_values("importance", ascending =False)

In [None]:
#même calcul que précédemment, mais pour la partie concernant les logements/lieux de villégiature
poi_df = poi_df.merge (georef_df, on=["municipality_code"])
poi_df.head(5)

In [None]:
#sélection des colonnes dont on aura besoin pour le calcul
poi_df_department = poi_df[["poi", "municipality_code", "importance", "department_name"]]
poi_df_department


In [None]:
#groupement par département, puis classement par le département ayant le + de logements/lieux de villégiature
group_poi = poi_df_department.groupby("department_name")[["importance"]].sum()
group_poi
group_poi.sort_values("importance", ascending =False)

In [None]:
#ajout des 2 calculs d'importance
department_merged_df = group_poi.merge (group_site, on=["department_name"])
#department_merged_df["somme_importance"]=department_merged_df["importance_x"]+department_merged_df["importance_y"]
#department_merged_df = department_merged_df.drop(columns=["importance_x", "importance_y"])
#department_merged_df
#department_merged_df.sort_values("somme_importance", ascending =False)
department_merged_df



##### 2. REAL ESTATE

In [None]:
# 2.1 calcul du loyer au m2 médian par municipality_code
rental_med = real_estate_df [["municipality_code", "rental_med_all"]]
rental_med

In [None]:
sales_df

In [None]:
#calcul du prix d'achat au m2 médian par municipality_code
sales_df
sales_df_grouped = sales_df.groupby(["municipality_code"])[["sales_amount", "surface", "premise_type"]].agg({"sales_amount": "sum", "surface": "sum", "premise_type": "count"})
sales_df_grouped = pd.DataFrame (sales_df_grouped)
sales_df_grouped

In [None]:
#jointure pour rajouter dans cette table le loyer médian par municipality_code
sales_df
real_estate_grouped = sales_df_grouped.merge(rental_med, on="municipality_code")
real_estate_grouped

In [None]:
#ajout du nom du département correspondant à chaque municipality code
real_estate_department = real_estate_grouped.merge(georef_df, on="municipality_code")
real_estate_department
real_estate_department = real_estate_department [["municipality_code", "sales_amount", "surface", "rental_med_all", "department_name", "premise_type"]]

In [250]:
real_estate_department

Unnamed: 0,municipality_code,sales_amount,surface,rental_med_all,department_name,premise_type,intermediate_sum
0,01001,1.362286e+07,6781.0,10.66,Ain,60,639.60
1,01002,4.067389e+06,1901.0,10.16,Ain,17,172.72
2,01004,1.852356e+08,87209.0,11.25,Ain,1062,11947.50
3,01005,3.579244e+07,13876.0,13.28,Ain,138,1832.64
4,01006,2.575955e+06,1425.0,12.70,Ain,13,165.10
...,...,...,...,...,...,...,...
31892,95676,5.974304e+06,2486.0,12.29,Val-d'Oise,23,282.67
31893,95678,1.222182e+07,3568.0,18.53,Val-d'Oise,38,704.14
31894,95680,8.569815e+07,32626.0,16.22,Val-d'Oise,471,7639.62
31895,95682,1.463606e+06,533.0,16.57,Val-d'Oise,5,82.85


In [251]:
#calcul du prix au m2 par département
average_price_per_m2 = real_estate_department.groupby(["department_name"])[["sales_amount", "surface"]].agg({"sales_amount": "sum", "surface": "sum"})
average_price_per_m2
average_price_per_m2["average_price_per_m2"] = average_price_per_m2["sales_amount"]/average_price_per_m2["surface"]
average_price_per_m2.sort_values("average_price_per_m2")

Unnamed: 0_level_0,sales_amount,surface,average_price_per_m2
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Creuse,3.494798e+08,250929.0,1392.743706
Indre,9.313114e+08,643735.0,1446.731035
Nièvre,9.686520e+08,663808.0,1459.235257
Haute-Marne,6.485911e+08,444240.0,1460.001467
Meuse,8.157466e+08,550437.0,1481.998032
...,...,...,...
Corse-du-Sud,3.267670e+09,739291.0,4420.005565
Alpes-Maritimes,3.837944e+10,7959808.0,4821.653974
Val-de-Marne,1.529389e+10,2931831.0,5216.497456
Hauts-de-Seine,2.665847e+10,3573916.0,7459.175300


In [None]:
#calcul du loyer médian par départment
real_estate_department["intermediate_sum"]=real_estate_department["rental_med_all"]*real_estate_department["premise_type"]
real_estate_department
average_rental = real_estate_department.groupby(["department_name"])[["intermediate_sum", "premise_type"]].agg({"intermediate_sum": "sum", "premise_type": "sum"})
average_rental
average_rental["average_rental"]= average_rental["intermediate_sum"]/average_rental["premise_type"]
average_rental

In [None]:
#regroupement des colonnes avec le loyer moyen au m2 par département et le prix d'achat au m2 moyen par département
yield_calculation = average_price_per_m2.merge(average_rental, on="department_name")
yield_calculation
yield_calculation = yield_calculation.drop(columns=["sales_amount", "surface", "intermediate_sum", "premise_type"])
yield_calculation["yield_rate"]=yield_calculation["average_rental"]*12/yield_calculation["average_price_per_m2"]*100
yield_calculation.sort_values("yield_rate", ascending=True)


In [249]:
#Informations sur la rentabilité locative
yield_calculation

Unnamed: 0_level_0,average_price_per_m2,average_rental,yield_rate
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ain,2635.784095,13.183580,6.002122
Aisne,1561.099105,9.919550,7.625051
Allier,1577.489243,10.015825,7.619063
Alpes-Maritimes,4821.653974,17.530595,4.362966
Alpes-de-Haute-Provence,2281.499046,11.304575,5.945867
...,...,...,...
Vendée,2401.896369,10.549698,5.270684
Vienne,1752.067837,9.783623,6.700852
Vosges,1713.030737,9.622087,6.740395
Yonne,1607.734995,9.907284,7.394714


In [None]:
# 2.2 calcul de la variation entre 2018 et 2021

#ajout d'une colonne "year"
sales_df.info()
sales_df["year"]=sales_df["sales_date"].dt.year

In [None]:
#merge pour rajouter le département
sales_info_per_department = sales_df.merge (georef_df, on=["municipality_code"])
sales_info_per_department

In [None]:
#filtre uniquement sur les années 2020 et 2021 (car ce sont les seules années où nous avons toutes les informations)
sales_info_per_department = sales_info_per_department[sales_info_per_department['year'].isin([2020, 2021])]
sales_info_per_department

In [None]:
#groupement par année et par département
sales_df_per_year = sales_info_per_department.groupby(["department_name", "year"])[["sales_amount", "surface"]].agg({"sales_amount": "sum", "surface": "sum"})
sales_df_per_year

In [None]:
#calcul du prix moyen au m2
sales_df_per_year["average_price_m2"]=sales_df_per_year["sales_amount"]/sales_df_per_year["surface"]
sales_df_per_year
sales_df_per_year.head(50)

In [None]:
#calcul de l'évolution entre 2018 et 2021
sales_df_per_year['price_m2_growth'] = sales_df_per_year.groupby('department_name')['average_price_m2'].pct_change()
sales_df_per_year


In [248]:
#calcul final de l'évolution
sales_df_per_year = sales_df_per_year.dropna()
sales_df_per_year.drop (columns=["sales_amount", "surface"])
sales_df_per_year.sort_values ("price_m2_growth", ascending=False)


Unnamed: 0_level_0,Unnamed: 1_level_0,sales_amount,surface,average_price_m2,price_m2_growth
department_name,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Pyrénées-Atlantiques,2021,3.821283e+09,1068164.0,3577.430704,0.140719
Landes,2021,2.409002e+09,818313.0,2943.864249,0.112565
Corse-du-Sud,2021,9.380425e+08,193855.0,4838.887261,0.106444
Finistère,2021,3.056316e+09,1421428.0,2150.172909,0.099954
Maine-et-Loire,2021,2.644990e+09,1162430.0,2275.396966,0.096707
...,...,...,...,...,...
Haute-Marne,2021,1.928814e+08,129888.0,1484.982081,0.009958
Haute-Saône,2021,3.172671e+08,202655.0,1565.552673,0.008060
Cantal,2021,2.238980e+08,138821.0,1612.854279,-0.011984
La Réunion,2021,1.260015e+09,384936.0,3273.309935,-0.024756


In [247]:
#calcul du nb de maisons vacantes en 2019
stock_df_2018 = stock_df[stock_df['year'].isin([2018])]
stock_df_2018
stock_df_2018 = stock_df_2018.merge (georef_df, on=["municipality_code"])
vacants_housing_per_department = stock_df_2018.groupby("department_name")["nb_vacants_housing"].sum()
vacants_housing_per_department = pd.DataFrame(vacants_housing_per_department)
vacants_housing_per_department

Unnamed: 0_level_0,nb_vacants_housing
department_name,Unnamed: 1_level_1
Ain,25849
Aisne,26001
Allier,30479
Alpes-Maritimes,64643
Alpes-de-Haute-Provence,10768
...,...
Vendée,21818
Vienne,23264
Vosges,24154
Yonne,23303


In [None]:
stock_df_2018


In [None]:
# 2.3 taxe d'habitation sur les maisons secondaires par département

TAX_FILENAME = 'taxe_habitation.xlsx'

tax_df = pd.read_excel(os.path.join(DATA_PATH, TAX_FILENAME))
tax_df.head()
tax_df = tax_df.rename(columns={'RÉGIONS': 'department_name'})
tax_df

##### 3. SECONDARY HOME

In [148]:
# 3.1 Superficie moyenne des logements vendus par départements

# Joindre les informations de géolocalisation pour obtenir les départements
real_estate_sales_dep = sales_df.merge(
    georef_df[['municipality_code', 'department_code', 'department_name']],
    on='municipality_code'
)

# Calculer la surface moyenne des logements vendus par département
average_surface_municipality = real_estate_sales_dep.groupby('department_name')['surface'].mean().reset_index()

average_surface_municipality

Unnamed: 0,department_name,surface
0,Ain,95.491503
1,Aisne,91.956053
2,Allier,85.308972
3,Alpes-Maritimes,66.283678
4,Alpes-de-Haute-Provence,73.283742
...,...,...
92,Vendée,84.926212
93,Vienne,87.584873
94,Vosges,88.986998
95,Yonne,88.105673


In [149]:
# 3.2 Évolution du % des maisons secondaires par département

# Filtrer les données pour les années 2008 et 2018
housing_2008 = stock_df[stock_df['year'] == 2008]
housing_2018 = stock_df[stock_df['year'] == 2018]

# Renommer les colonnes pour les années spécifiques
housing_2008 = housing_2008[['municipality_code', 'nb_second_home']].rename(columns={'nb_second_home': 'nb_second_home_2008'})
housing_2018 = housing_2018[['municipality_code', 'nb_second_home']].rename(columns={'nb_second_home': 'nb_second_home_2018'})

# Joindre les données pour les années 2008 et 2018 sur le code de municipalité
secondary_home_rate_comparison = housing_2008.merge(housing_2018, on='municipality_code')

# Joindre les informations de géolocalisation pour obtenir les départements
secondary_home_rate_comparison = secondary_home_rate_comparison.merge(
    georef_df[['municipality_code', 'department_code', 'department_name']],
    on='municipality_code'
)

# Calculer l'évolution moyenne du pourcentage de maisons secondaires par département
secondary_home_rate_evolution_department = secondary_home_rate_comparison.groupby(['department_name'])[['nb_second_home_2008', "nb_second_home_2018"]].agg({'nb_second_home_2008': "sum", "nb_second_home_2018": "sum"})
secondary_home_rate_evolution_department["evolution_secondary_homes"]=((secondary_home_rate_evolution_department["nb_second_home_2018"]-secondary_home_rate_evolution_department["nb_second_home_2008"])/secondary_home_rate_evolution_department["nb_second_home_2008"])*100
secondary_home_rate_evolution_department.head(50)

Unnamed: 0_level_0,nb_second_home_2008,nb_second_home_2018,evolution_secondary_homes
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ain,16960,17574,3.620283
Aisne,10185,9393,-7.776141
Allier,14797,15032,1.58816
Alpes-Maritimes,166798,195108,16.972626
Alpes-de-Haute-Provence,38403,40199,4.676718
Ardennes,5070,5003,-1.321499
Ardèche,35985,36779,2.206475
Ariège,25341,27107,6.968944
Aube,6930,7825,12.914863
Aude,61952,67047,8.224109


In [150]:
# 3.3 taxe d'habitation sur les maisons secondaires par département

TAX_FILENAME = 'taxe_habitation.xlsx'

tax_df = pd.read_excel(os.path.join(DATA_PATH, TAX_FILENAME))
tax_df.head()
tax_df = tax_df.rename(columns={'RÉGIONS': 'department_name'})
tax_df

Unnamed: 0,department_name,Taxe d'habitation moyenne en 2023,Nombre d'avis d'impôt
0,Ain,847,23 000
1,Aisne,732,14 000
2,Allier,801,18 000
3,Alpes-de-Haute-Provence,551,35 000
4,Hautes-Alpes,567,52 000
...,...,...,...
96,Guadeloupe,1080,26 000
97,Martinique,986,18 000
98,Guyane,766,9 000
99,La Réunion,1182,15 000


##### 4. LIFE QUALITY

In [None]:
# 4.1 Professionnels de santé pour 100 000 habitants par départements en 2023
DATA_PATH = '../data/cleaned'
HEALTH_FILENAME = 'health_df_cleaned.csv'

health_df = pd.read_csv(os.path.join(DATA_PATH, HEALTH_FILENAME))
health_df.head(50)

In [None]:
# 4.2 Taux de criminalité pour 1000 habitants par départements en 2020

CRIMINALITY_FILENAME = 'criminality_df_cleaned.csv'

criminality_df = pd.read_csv(os.path.join(DATA_PATH, CRIMINALITY_FILENAME))

# Convertir criminality_per_1000 en type numérique (si nécessaire)
criminality_df['criminality_per_1000'] = pd.to_numeric(criminality_df['criminality_per_1000'].str.replace(',', '.'))

# Agréger georef_df par département_name pour obtenir une seule ligne par département
georef_aggregated = georef_df.groupby('department_name').first().reset_index()

# Effectuer une fusion (merge) pour ajouter department_code à criminality_aggregated en utilisant department_name comme clé
criminality_aggregated = criminality_df.groupby('department_name')['criminality_per_1000'].mean().reset_index()
criminality_per_department = criminality_aggregated.merge(georef_aggregated[['department_name', 'department_code']], on='department_name')

# Afficher les premières lignes du dataframe mis à jour
criminality_per_department.tail(50)

In [None]:
# 4.3 Nombre de jours de soleil par an par départements

SUNNY_FILENAME = 'heures_ensoleillement.xlsx'

sunny_df = pd.read_excel(os.path.join(DATA_PATH, SUNNY_FILENAME))
sunny_df.head()
sunny_df = sunny_df.rename(columns={'Départements Français et Dom Tom': 'department_name'})
sunny_df
sunny_df_per_department = sunny_df.drop (columns=["Num dép", "Classement"])
sunny_df_per_department.tail(50)

# SCORING

##### 1. TOURISM

In [133]:
department_merged_df["ranking_hosting"]= department_merged_df["importance_x"]
department_merged_df["ranking_touristic_sites"]= department_merged_df["importance_y"]
calculation_tourism_scoring = department_merged_df.drop (columns=["importance_x", "importance_y"])
calculation_tourism_scoring

Unnamed: 0_level_0,ranking_hosting,ranking_touristic_sites
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ain,11.423180,34.061847
Aisne,4.937153,15.687868
Allier,8.386644,21.401418
Alpes-Maritimes,32.802552,32.967630
Alpes-de-Haute-Provence,11.293578,14.722338
...,...,...
Vendée,27.816510,32.287377
Vienne,8.205227,21.203995
Vosges,12.007083,14.973759
Yonne,7.211247,16.595920


##### 2. REAL ESTATE

In [233]:
#fusion des différents dataframes pour réaliser le scoring
real_estate_scoring_merge_1 = yield_calculation.merge(sales_df_per_year, on="department_name")
real_estate_scoring_merge_2 = real_estate_scoring_merge_1.merge(vacants_housing_per_department, on="department_name")
real_estate_scoring_merge_2

Unnamed: 0_level_0,average_price_per_m2,average_rental,yield_rate,sales_amount,surface,average_price_m2,price_m2_growth,nb_vacants_housing
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Ain,2635.784095,13.183580,6.002122,2.802408e+09,990520.0,2829.229384,0.072189,25849
Aisne,1561.099105,9.919550,7.625051,8.480408e+08,523582.0,1619.690449,0.033483,26001
Allier,1577.489243,10.015825,7.619063,6.105036e+08,372329.0,1639.688435,0.029527,30479
Alpes-Maritimes,4821.653974,17.530595,4.362966,1.050612e+10,2027161.0,5182.678071,0.078167,64643
Alpes-de-Haute-Provence,2281.499046,11.304575,5.945867,5.404214e+08,227226.0,2378.343326,0.029776,10768
...,...,...,...,...,...,...,...,...
Vendée,2401.896369,10.549698,5.270684,3.281171e+09,1290262.0,2543.026785,0.091690,21818
Vienne,1752.067837,9.783623,6.700852,1.002048e+09,549295.0,1824.243054,0.066504,23264
Vosges,1713.030737,9.622087,6.740395,7.157524e+08,405495.0,1765.132472,0.039378,24154
Yonne,1607.734995,9.907284,7.394714,7.649412e+08,464540.0,1646.663833,0.041092,23303


In [234]:
#fusion du dernier dataframe
real_estate_scoring_merge_3 = real_estate_scoring_merge_2.merge(tax_df, on="department_name")
real_estate_scoring_merge_3
real_estate_scoring_merge_3 = real_estate_scoring_merge_3.drop(columns=["average_price_per_m2","sales_amount","surface","average_price_m2","average_rental","Nombre d'avis d'impôt"
])
real_estate_scoring_merge_3

Unnamed: 0,department_name,yield_rate,price_m2_growth,nb_vacants_housing,Taxe d'habitation moyenne en 2023
0,Ain,6.002122,0.072189,25849,847
1,Aisne,7.625051,0.033483,26001,732
2,Allier,7.619063,0.029527,30479,801
3,Alpes-Maritimes,4.362966,0.078167,64643,1686
4,Alpes-de-Haute-Provence,5.945867,0.029776,10768,551
...,...,...,...,...,...
88,Vendée,5.270684,0.091690,21818,759
89,Vienne,6.700852,0.066504,23264,746
90,Vosges,6.740395,0.039378,24154,611
91,Yonne,7.394714,0.041092,23303,682


In [257]:
#éléments pour le calcul du scoring immo
calculation_real_estate_scoring = real_estate_scoring_merge_3
calculation_real_estate_scoring.head(50)
calculation_real_estate_scoring["Taxe d'habitation moyenne en 2023"].astype(float)


0      847.0
1      732.0
2      801.0
3     1686.0
4      551.0
       ...  
88     759.0
89     746.0
90     611.0
91     682.0
92    1443.0
Name: Taxe d'habitation moyenne en 2023, Length: 93, dtype: float64

In [258]:
calculation_real_estate_scoring.tail(50)
calculation_real_estate_scoring = calculation_real_estate_scoring.drop(columns="Taxe d'habitation moyenne en 2023")
calculation_real_estate_scoring

Unnamed: 0,department_name,yield_rate,price_m2_growth,nb_vacants_housing
0,Ain,6.002122,0.072189,25849
1,Aisne,7.625051,0.033483,26001
2,Allier,7.619063,0.029527,30479
3,Alpes-Maritimes,4.362966,0.078167,64643
4,Alpes-de-Haute-Provence,5.945867,0.029776,10768
...,...,...,...,...
88,Vendée,5.270684,0.091690,21818
89,Vienne,6.700852,0.066504,23264
90,Vosges,6.740395,0.039378,24154
91,Yonne,7.394714,0.041092,23303


In [259]:
calculation_real_estate_scoring

Unnamed: 0,department_name,yield_rate,price_m2_growth,nb_vacants_housing
0,Ain,6.002122,0.072189,25849
1,Aisne,7.625051,0.033483,26001
2,Allier,7.619063,0.029527,30479
3,Alpes-Maritimes,4.362966,0.078167,64643
4,Alpes-de-Haute-Provence,5.945867,0.029776,10768
...,...,...,...,...
88,Vendée,5.270684,0.091690,21818
89,Vienne,6.700852,0.066504,23264
90,Vosges,6.740395,0.039378,24154
91,Yonne,7.394714,0.041092,23303


##### 3. SECONDARY HOME

In [155]:
#fusion des différents dataframes pour réaliser le scoring
calculation_secondary_home_scoring_merge_1 = average_surface_municipality.merge(secondary_home_rate_evolution_department, on="department_name")
calculation_secondary_home_scoring_merge_2 = calculation_secondary_home_scoring_merge_1.merge(tax_df, on="department_name")
calculation_secondary_home_scoring_merge_2
calculation_secondary_home_scoring=calculation_secondary_home_scoring_merge_2.drop (columns=["nb_second_home_2008", "nb_second_home_2018", "Nombre d'avis d'impôt"])
calculation_secondary_home_scoring

Unnamed: 0,department_name,surface,evolution_secondary_homes,Taxe d'habitation moyenne en 2023
0,Ain,95.491503,3.620283,847
1,Aisne,91.956053,-7.776141,732
2,Allier,85.308972,1.588160,801
3,Alpes-Maritimes,66.283678,16.972626,1686
4,Alpes-de-Haute-Provence,73.283742,4.676718,551
...,...,...,...,...
92,Vendée,84.926212,-1.718634,759
93,Vienne,87.584873,15.877164,746
94,Vosges,88.986998,12.625825,611
95,Yonne,88.105673,-9.106951,682


##### 4. LIFE QUALITY

In [None]:
# MERGE DES 3 DF
life_quality_df = sunny_df_per_department.merge(criminality_per_department, on='department_name', how='inner')
life_quality_df = life_quality_df.merge(health_df, on='department_name', how='outer')

# Remplacer les NaN par des valeurs nulles
life_quality_df = life_quality_df.fillna(0)  # Vous pouvez remplacer 0 par d'autres valeurs par défaut si nécessaire

# Supprimer les colonnes redondantes department_code_x et department_code_y
life_quality_df = life_quality_df.drop(columns=['department_code_x', 'department_code_y', "ensemble des médecins", "dont généralistes", "dont spécialistes", "chirurg. dentistes", "pharm."])

calculation_life_quality_scoring = pd.DataFrame (life_quality_df)
calculation_life_quality_scoring.tail(50)

In [None]:
#modification des 3 lignes nulles (je n'ai pas compris pourquoi elles l'étaient)
calculation_life_quality_scoring.at[21,"criminality_per_1000"]=36.72
calculation_life_quality_scoring.at[22,"criminality_per_1000"]=30.92
calculation_life_quality_scoring.at[21,"Ensoleillement (heures)"]=1789
calculation_life_quality_scoring.at[22,"Ensoleillement (heures)"]=1512
calculation_life_quality_scoring.at[92,"Ensoleillement (heures)"]=1719
calculation_life_quality_scoring.at[92,"criminality_per_1000"]=43.79

calculation_life_quality_scoring.head(50)

# **SCALING**

##### 1. TOURISM

In [189]:
from sklearn.preprocessing import RobustScaler, MinMaxScaler, StandardScaler
scaler = MinMaxScaler()

#scaling pour calculer le scoring
calculation_tourism_scoring_numeric = calculation_tourism_scoring.select_dtypes(include="number")
df_scaled_tourism = scaler.fit_transform(calculation_tourism_scoring_numeric)
df_scaled_tourism = pd.DataFrame(df_scaled_tourism, columns=calculation_tourism_scoring_numeric.columns, index=calculation_tourism_scoring.index)
df_scaled_tourism.head(50)

Unnamed: 0_level_0,ranking_hosting,ranking_touristic_sites
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ain,0.142733,0.627787
Aisne,0.048254,0.257672
Allier,0.098501,0.372763
Alpes-Maritimes,0.454155,0.605746
Alpes-de-Haute-Provence,0.140845,0.238223
Ardennes,0.027919,0.092048
Ardèche,0.437353,0.368949
Ariège,0.106684,0.14551
Aube,0.056686,0.093658
Aude,0.190056,0.336015


In [190]:
#cleaning des différents KPI
df_scaled_tourism['ranking_hosting'] = round(df_scaled_tourism['ranking_hosting'], 2)
df_scaled_tourism['ranking_touristic_sites'] = round(df_scaled_tourism['ranking_touristic_sites'], 2)
df_scaled_tourism

Unnamed: 0_level_0,ranking_hosting,ranking_touristic_sites
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ain,0.14,0.63
Aisne,0.05,0.26
Allier,0.10,0.37
Alpes-Maritimes,0.45,0.61
Alpes-de-Haute-Provence,0.14,0.24
...,...,...
Vendée,0.38,0.59
Vienne,0.10,0.37
Vosges,0.15,0.24
Yonne,0.08,0.28


##### 2. REAL ESTATE

In [191]:
#scaling pour calculer le scoring
calculation_real_estate_scoring_numeric = calculation_real_estate_scoring.select_dtypes(include="number")
scaler = MinMaxScaler()
df_scaled_real_estate = scaler.fit_transform(calculation_real_estate_scoring_numeric)
df_scaled_real_estate = pd.DataFrame(df_scaled_real_estate, index=real_estate_scoring_merge_2.index, columns=calculation_real_estate_scoring_numeric.columns)
df_scaled_real_estate

  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))


Unnamed: 0_level_0,yield_rate,price_m2_growth,nb_vacants_housing,Rentability_score,Growth_score,Vacancy_score
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ain,0.633355,0.551218,0.197095,,,
Aisne,0.991120,0.297747,0.198424,,,
Allier,0.989800,0.271845,0.237583,,,
Alpes-Maritimes,0.272014,0.590368,0.536334,,,
Alpes-de-Haute-Provence,0.620955,0.273472,0.065217,,,
...,...,...,...,...,...,...
Vendée,0.472114,0.678925,0.161845,,,
Vienne,0.787386,0.513990,0.174490,,,
Vosges,0.796103,0.336356,0.182273,,,
Yonne,0.940344,0.347576,0.174831,,,


In [192]:
#cleaning des différents KPI
df_scaled_real_estate['yield_rate'] = round(df_scaled_real_estate['yield_rate'], 2)
df_scaled_real_estate['price_m2_growth'] = round(df_scaled_real_estate['price_m2_growth'], 2)
df_scaled_real_estate['nb_vacants_housing'] = round(df_scaled_real_estate['nb_vacants_housing'], 2)
df_scaled_real_estate

Unnamed: 0_level_0,yield_rate,price_m2_growth,nb_vacants_housing,Rentability_score,Growth_score,Vacancy_score
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ain,0.63,0.55,0.20,,,
Aisne,0.99,0.30,0.20,,,
Allier,0.99,0.27,0.24,,,
Alpes-Maritimes,0.27,0.59,0.54,,,
Alpes-de-Haute-Provence,0.62,0.27,0.07,,,
...,...,...,...,...,...,...
Vendée,0.47,0.68,0.16,,,
Vienne,0.79,0.51,0.17,,,
Vosges,0.80,0.34,0.18,,,
Yonne,0.94,0.35,0.17,,,


In [193]:
calculation_real_estate_scoring.tail(50)

Unnamed: 0,department_name,yield_rate,price_m2_growth,nb_vacants_housing,Rentability_score,Growth_score,Vacancy_score
43,Hérault,5.334787,0.067913,53371,,,
44,Ille-et-Vilaine,5.500305,0.088263,37096,,,
45,Indre,7.204237,0.047201,18514,,,
46,Indre-et-Loire,5.952506,0.054554,28098,,,
47,Isère,6.016491,0.076904,50307,,,
48,Jura,7.049789,0.048344,14794,,,
49,Landes,5.059902,0.112565,17224,,,
50,Loir-et-Cher,6.923747,0.05764,20023,,,
51,Loire,6.506399,0.038213,41323,,,
52,Loire-Atlantique,4.964178,0.088958,41992,,,


##### 3. SECONDARY HOME

In [194]:
#scaling pour calculer le scoring
calculation_secondary_home_scoring_numeric = calculation_secondary_home_scoring.select_dtypes(include="number")
df_scaled_secondary_home = scaler.fit_transform(calculation_secondary_home_scoring_numeric)
df_scaled_secondary_home = pd.DataFrame(df_scaled_secondary_home, columns=calculation_secondary_home_scoring_numeric.columns, index=calculation_secondary_home_scoring["department_name"])
df_scaled_secondary_home.head(50)

Unnamed: 0_level_0,surface,evolution_secondary_homes,Taxe d'habitation moyenne en 2023
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ain,0.849569,0.13895,0.213806
Aisne,0.777615,0.014529,0.158677
Allier,0.642331,0.116764,0.191755
Alpes-Maritimes,0.255123,0.284725,0.616012
Alpes-de-Haute-Provence,0.39759,0.150484,0.071908
Ardennes,0.776964,0.084998,0.084372
Ardèche,0.679231,0.123515,0.05465
Ariège,0.61961,0.175509,0.053212
Aube,0.626639,0.240424,0.139981
Aude,0.489839,0.189213,0.088207


In [195]:
#cleaning des différents KPI
df_scaled_secondary_home['surface'] = round(df_scaled_secondary_home['surface'], 2)
df_scaled_secondary_home['evolution_secondary_homes'] = round(df_scaled_secondary_home['evolution_secondary_homes'], 2)
df_scaled_secondary_home["Taxe d'habitation moyenne en 2023"] = round(1 - df_scaled_secondary_home["Taxe d'habitation moyenne en 2023"], 2)
df_scaled_secondary_home

Unnamed: 0_level_0,surface,evolution_secondary_homes,Taxe d'habitation moyenne en 2023
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ain,0.85,0.14,0.79
Aisne,0.78,0.01,0.84
Allier,0.64,0.12,0.81
Alpes-Maritimes,0.26,0.28,0.38
Alpes-de-Haute-Provence,0.40,0.15,0.93
...,...,...,...
Vendée,0.63,0.08,0.83
Vienne,0.69,0.27,0.83
Vosges,0.72,0.24,0.90
Yonne,0.70,0.00,0.87


##### 4. LIFE QUALITY

In [196]:
#scaling pour calculer le scoring
calculation_life_quality_scoring_numeric = calculation_life_quality_scoring.select_dtypes(include="number")
df_scaled_life_quality = scaler.fit_transform(calculation_life_quality_scoring_numeric)
df_scaled_life_quality = pd.DataFrame(df_scaled_life_quality, columns=calculation_life_quality_scoring_numeric.columns, index=calculation_life_quality_scoring["department_name"])
df_scaled_life_quality.tail(50)

Unnamed: 0_level_0,Ensoleillement (heures),criminality_per_1000,ensemble des médecins.1
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Isère,0.721171,0.524738,0.310388
Jura,0.674402,0.352332,0.196496
La Réunion,0.0,0.0,0.34418
Landes,0.661192,0.352535,0.257822
Loir-et-Cher,0.620136,0.311084,0.177722
Loire,0.71653,0.418368,0.320401
Loire-Atlantique,0.603356,0.53632,0.332916
Loiret,0.610496,0.401199,0.177722
Lot,0.73331,0.312913,0.214018
Lot-et-Garonne,0.698679,0.354973,0.201502


In [197]:
#cleaning des différents KPI
df_scaled_life_quality['Ensoleillement'] = round(df_scaled_life_quality['Ensoleillement (heures)'], 2)
df_scaled_life_quality['Criminality'] = round(1 - df_scaled_life_quality['criminality_per_1000'], 2)
df_scaled_life_quality['Health'] = round(df_scaled_life_quality['ensemble des médecins.1'], 2)

df_scaled_life_quality = df_scaled_life_quality.drop(columns=["Ensoleillement (heures)", "criminality_per_1000", "ensemble des médecins.1"])
df_scaled_life_quality

Unnamed: 0_level_0,Ensoleillement,Criminality,Health
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ain,0.69,0.64,0.11
Aisne,0.57,0.58,0.15
Allier,0.66,0.64,0.23
Alpes-Maritimes,0.95,0.43,0.47
Alpes-de-Haute-Provence,0.93,0.55,0.25
...,...,...,...
Vendée,0.63,0.69,0.17
Vienne,0.67,0.62,0.33
Vosges,0.62,0.65,0.20
Yonne,0.63,0.54,0.15


# **NORMALIZATION**

##### 1. TOURISM

In [218]:
tourism_scoring = pd.DataFrame()
tourism_scoring['Hosting_score'] = df_scaled_tourism['ranking_hosting']*10
tourism_scoring['Touristic_sites_score'] = df_scaled_tourism['ranking_touristic_sites']*10
tourism_scoring['Global_tourism_score'] = round((tourism_scoring['Hosting_score'] + tourism_scoring['Touristic_sites_score'])/2,1)
tourism_scoring.sort_values("Global_tourism_score", ascending=False).head()

Unnamed: 0_level_0,Hosting_score,Touristic_sites_score,Global_tourism_score
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Paris,10.0,7.4,8.7
Gironde,3.4,10.0,6.7
Finistère,4.0,8.8,6.4
Savoie,5.2,7.3,6.2
Bouches-du-Rhône,3.3,8.9,6.1


##### 2. REAL ESTATE

In [219]:
real_estate_scoring= pd.DataFrame()
real_estate_scoring['Rentability_score'] = df_scaled_real_estate['yield_rate']*10
real_estate_scoring['Growth_score'] = df_scaled_real_estate['price_m2_growth']*10
real_estate_scoring['Vacancy_score'] = df_scaled_real_estate['nb_vacants_housing']*10
real_estate_scoring['Global_real_estate_score'] = round((real_estate_scoring['Rentability_score'] + real_estate_scoring['Growth_score'] + (real_estate_scoring['Vacancy_score']/2))/2.5,1)
real_estate_scoring.sort_values("Global_real_estate_score", ascending=False).head()

Unnamed: 0_level_0,Rentability_score,Growth_score,Vacancy_score,Global_real_estate_score
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Finistère,7.1,7.3,3.3,6.4
Nord,6.9,4.3,7.9,6.1
Eure,8.6,5.5,1.9,6.0
Aube,9.8,4.4,1.1,5.9
Meurthe-et-Moselle,8.5,4.8,2.8,5.9


##### 3. SECONDARY HOME

In [220]:
secondary_home_scoring= pd.DataFrame()
secondary_home_scoring['Surface_score'] = df_scaled_secondary_home['surface']*10
secondary_home_scoring['Secondary_home_growth_score'] = df_scaled_secondary_home['evolution_secondary_homes']*10
secondary_home_scoring['Tax_score'] = df_scaled_secondary_home["Taxe d'habitation moyenne en 2023"]*10
secondary_home_scoring['Global_secondary_home_score'] = round(((secondary_home_scoring['Surface_score']/2) + secondary_home_scoring['Secondary_home_growth_score'] + secondary_home_scoring['Tax_score'])/2.5,1)
secondary_home_scoring.sort_values("Global_secondary_home_score", ascending=False).head()

Unnamed: 0_level_0,Surface_score,Secondary_home_growth_score,Tax_score,Global_secondary_home_score
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Guadeloupe,4.7,9.8,6.7,7.5
La Réunion,4.3,10.0,6.3,7.4
Martinique,4.2,8.4,7.2,7.1
Guyane,5.0,6.6,8.3,7.0
Nord,6.7,6.6,6.7,6.7


##### 4. LIFE QUALITY

In [221]:
life_quality_scoring= pd.DataFrame()
life_quality_scoring['Sun_score'] = df_scaled_life_quality['Ensoleillement']*10
life_quality_scoring['Safety_score'] = df_scaled_life_quality['Criminality']*10
life_quality_scoring['Health_score'] = df_scaled_life_quality["Health"]*10
life_quality_scoring['Global_life_quality_score'] = round(((life_quality_scoring['Sun_score'])/2 + life_quality_scoring['Safety_score'] + life_quality_scoring['Health_score'])/2.5,1)
life_quality_scoring.sort_values("Global_life_quality_score", ascending=False).head()

Unnamed: 0_level_0,Sun_score,Safety_score,Health_score,Global_life_quality_score
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Hautes-Alpes,8.7,6.3,5.2,6.3
Pyrénées-Atlantiques,6.7,6.7,4.3,5.7
Corse-du-Sud,9.7,6.0,3.3,5.7
Haute-Corse,9.0,6.8,2.6,5.6
Haute-Vienne,6.6,6.9,3.8,5.6


##### 5. GLOBAL SCORE

In [227]:
global_scoring_merge_1 = tourism_scoring.merge(real_estate_scoring, on="department_name")
global_scoring_merge_2 = global_scoring_merge_1.merge(secondary_home_scoring, on="department_name")
global_scoring_merge_3 = global_scoring_merge_2.merge(life_quality_scoring, on="department_name")
global_scoring_merge_3["Global_scoring"]= round((global_scoring_merge_3["Global_tourism_score"]+global_scoring_merge_3["Global_real_estate_score"]+global_scoring_merge_3["Global_secondary_home_score"]+global_scoring_merge_3["Global_life_quality_score"])/4,1)
global_scoring_table=global_scoring_merge_3
global_scoring_table.sort_values("Global_scoring", ascending=False).head(25)

Unnamed: 0_level_0,Hosting_score,Touristic_sites_score,Global_tourism_score,Rentability_score,Growth_score,Vacancy_score,Global_real_estate_score,Surface_score,Secondary_home_growth_score,Tax_score,Global_secondary_home_score,Sun_score,Safety_score,Health_score,Global_life_quality_score,Global_scoring
department_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Finistère,4.0,8.8,6.4,7.1,7.3,3.3,6.4,6.8,2.2,7.2,5.1,5.3,6.6,3.5,5.1,5.8
Nord,2.4,6.9,4.6,6.9,4.3,7.9,6.1,6.7,6.6,6.7,6.7,5.8,5.7,3.4,4.8,5.6
Bouches-du-Rhône,3.3,8.9,6.1,4.2,4.6,6.5,4.8,4.1,6.1,4.9,5.2,10.0,3.6,4.6,5.3,5.4
Gironde,3.4,10.0,6.7,3.4,5.7,4.5,4.5,6.5,3.2,6.4,5.1,7.1,4.3,4.4,4.9,5.3
Maine-et-Loire,1.4,6.7,4.0,6.4,7.1,2.1,5.8,7.4,3.1,8.0,5.9,6.0,6.6,3.3,5.2,5.2
Pyrénées-Atlantiques,4.2,5.2,4.7,3.4,10.0,2.6,5.9,6.0,2.9,5.7,4.6,6.7,6.7,4.3,5.7,5.2
Isère,3.4,8.3,5.8,6.4,5.8,4.1,5.7,5.5,1.9,7.6,4.9,7.2,4.8,3.1,4.6,5.2
Morbihan,3.5,7.1,5.3,4.9,7.0,2.7,5.3,7.0,1.6,7.9,5.2,6.2,6.7,3.1,5.2,5.2
Côte-d'Or,1.6,8.0,4.8,6.8,4.4,1.9,4.9,5.5,2.0,8.4,5.3,6.4,6.3,4.1,5.4,5.1
Hérault,4.7,6.5,5.6,4.9,5.2,4.4,4.9,3.6,1.5,7.1,4.2,9.3,4.3,4.4,5.3,5.0


In [265]:
#rajout dans le tableau "global score" de toutes les informations sur les départements qui ont permis de calculer le Global Score
global_scoring_info_merge_1 = global_scoring_table.merge(calculation_real_estate_scoring, on="department_name")
global_scoring_info_merge_2 = global_scoring_info_merge_1.merge(calculation_secondary_home_scoring, on="department_name")
global_scoring_info_merge_3 = global_scoring_info_merge_2.merge(calculation_life_quality_scoring, on="department_name")
global_scoring_info_merge_3["Rental_profitability_rate_
global_scoring_info_merge_3
global_scoring_info_merge_3
global_scoring_info_merge_3
global_scoring_info_merge_3

Unnamed: 0,department_name,Hosting_score,Touristic_sites_score,Global_tourism_score,Rentability_score,Growth_score,Vacancy_score,Global_real_estate_score,Surface_score,Secondary_home_growth_score,...,Global_scoring,yield_rate,price_m2_growth,nb_vacants_housing,surface,evolution_secondary_homes,Taxe d'habitation moyenne en 2023,Ensoleillement (heures),criminality_per_1000,ensemble des médecins.1
0,Ain,1.4,6.3,3.8,6.3,5.5,2.0,5.1,8.5,1.4,...,4.7,6.002122,0.072189,25849,95.491503,3.620283,847,1928.0,35.00,174
1,Aisne,0.5,2.6,1.6,9.9,3.0,2.0,5.6,7.8,0.1,...,4.1,7.625051,0.033483,26001,91.956053,-7.776141,732,1609.0,41.71,211
2,Allier,1.0,3.7,2.4,9.9,2.7,2.4,5.5,6.4,1.2,...,4.4,7.619063,0.029527,30479,85.308972,1.588160,801,1857.0,35.12,275
3,Alpes-Maritimes,4.5,6.1,5.3,2.7,5.9,5.4,4.5,2.6,2.8,...,4.6,4.362966,0.078167,64643,66.283678,16.972626,1686,2668.0,55.66,461
4,Alpes-de-Haute-Provence,1.4,2.4,1.9,6.2,2.7,0.7,3.7,4.0,1.5,...,4.0,5.945867,0.029776,10768,73.283742,4.676718,551,2596.0,44.57,291
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,Vendée,3.8,5.9,4.8,4.7,6.8,1.6,4.9,6.3,0.8,...,4.8,5.270684,0.091690,21818,84.926212,-1.718634,759,1756.0,30.51,221
89,Vienne,1.0,3.7,2.4,7.9,5.1,1.7,5.5,6.9,2.7,...,4.7,6.700852,0.066504,23264,87.584873,15.877164,746,1867.0,37.29,352
90,Vosges,1.5,2.4,2.0,8.0,3.4,1.8,4.9,7.2,2.4,...,4.4,6.740395,0.039378,24154,88.986998,12.625825,611,1743.0,34.07,248
91,Yonne,0.8,2.8,1.8,9.4,3.5,1.7,5.5,7.0,0.0,...,4.0,7.394714,0.041092,23303,88.105673,-9.106951,682,1759.0,44.85,212
