# Outil de prédiction des prix immobiliers

## Setup

In [2]:
import sys
sys.path.append("../")

In [1]:
%load_ext autoreload
%autoreload 2

In [54]:
from lib.model.loader import load_model

from lib.dataset import (
    load_dvfplus, 
    prepare_dataset,
    prepare_dummies, 
) 

from lib.dataset.utils import extract_int_from_string

In [67]:
import numpy as np 
import pandas as pd 

In [91]:
pd.set_option("display.max_columns", None)
pd.options.mode.chained_assignment = None

In [11]:
BACKUP_DIR = "../backup/"
DATA_DIR = "../data/"

## Attributs utilisateur

In [39]:
# user enter geo area and property type 

user_args = {
    "property_type": "flats", 
    "geo_area": "Paris", 
    "adresse_numero": 15, 
    "adresse_nom_voie": "Rue de la Convention", 
    "code_postal": 75015,
    "nombre_pieces_principales": 2
}

## Dataset, modèle & features

In [94]:
# load df according to user attributes

df = load_dvfplus(
    zip_dir=DATA_DIR, 
    zip_name="dvf+", 
    geo_area=user_args["geo_area"],
    property_type=user_args["property_type"]
)

  df = load_dvfplus(


In [95]:
# encode street number and zip code to correct format 

df["code_postal"] = df.code_postal.astype("Int32")
df["adresse_numero"] = df.adresse_numero.astype("Int32")

In [96]:
df.head()

Unnamed: 0,id_mutation,date_mutation,numero_disposition,nature_mutation,valeur_fonciere,adresse_numero,adresse_suffixe,adresse_nom_voie,adresse_code_voie,code_postal,code_commune,nom_commune,code_departement,ancien_code_commune,ancien_nom_commune,id_parcelle,ancien_id_parcelle,numero_volume,lot1_numero,lot1_surface_carrez,lot2_numero,lot2_surface_carrez,lot3_numero,lot3_surface_carrez,lot4_numero,lot4_surface_carrez,lot5_numero,lot5_surface_carrez,nombre_lots,code_type_local,type_local,surface_reelle_bati,nombre_pieces_principales,code_nature_culture,nature_culture,code_nature_culture_speciale,nature_culture_speciale,surface_terrain,longitude,latitude,dependance,code_region,nom_departement,nom_region,degre_densite,pop,annee,trimestre,mois,jour,parcelle_id,code_iris,periode_construction,periode_construction_max,hauteur_mean,altitude_sol_mean,conso_ener_mean,estim_ges_mean,conso_ener_std,estim_ges_std,conso_ener_min,estim_ges_min,conso_ener_max,estim_ges_max,ratio_ges_conso,enr_solaire_photovoltaique,enr_solaire_thermique_(chauffage)solaire_thermique_(ecs),enr_solaire_thermique_(ecs+chauffage),baie_u,mur_u_ext,pb_u,ph_u,mur_pos_isol_ext,prc_s_vitree_ext,presence_balcon,presence_climatisation,baie_orientation_indetermine,baie_orientation_nord,baie_orientation_ouest,baie_orientation_est,baie_orientation_horizontale,baie_orientation_est_ou_ouest,baie_orientation_sud,distance_batiment_historique_plus_proche,qpv,nb_lot_garpark,nb_lot_tot,nb_log,nb_lot_tertiaire,alea_argiles,alea_radon
0,2017-731653,2017-07-04,1,Vente,571110.0,16,,RUE SAINT GILLES,8627,75003,75103,Paris 3e Arrondissement,75,,,75103000AM0035,,,12.0,,36.0,49.51,,,,,,,2,2.0,Appartement,42.0,3.0,,,,,,2.366404,48.858043,0,11,Paris,Île-de-France,Paris,34788.0,2017,3,7,4,75103000AM0035,751031102.0,<1948,AVANT_1949,16.0,35.0,269.0,13.0,208.5,4.4,69.0,8.0,485.0,16.0,0.23,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,323.0,0.0,0.0,26.0,23.0,26.0,,Faible
1,2017-731655,2017-07-04,1,Vente,1578150.0,66,,RUE DE LISBONNE,5708,75008,75108,Paris 8e Arrondissement,75,,,75108000CO0044,,,61.0,139.02,62.0,,66.0,,,,,,3,2.0,Appartement,135.0,4.0,,,,,,2.306998,48.877579,0,11,Paris,Île-de-France,Paris,36453.0,2017,3,7,4,75108000CO0044,751083203.0,<1948,AVANT_1949,20.0,47.0,197.0,40.3,21.3,3.2,181.9,38.0,212.0,42.6,0.18,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,388.0,0.0,2.0,26.0,23.0,23.0,,Faible
2,2017-731657,2017-07-05,1,Vente,2810880.0,27,,RUE MARBEUF,5993,75008,75108,Paris 8e Arrondissement,75,,,75108000AR0060,,,93.0,219.6,,,,,,,,,1,2.0,Appartement,220.0,6.0,,,,,,2.304234,48.869357,1,11,Paris,Île-de-France,Paris,36453.0,2017,3,7,5,75108000AR0060,751082903.0,<1948,AVANT_1949,24.0,37.0,1604.3,51.6,1283.7,41.8,183.0,5.0,2721.0,88.0,0.03,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,271.0,0.0,0.0,164.0,59.0,104.0,,Faible
3,2017-731658,2017-07-03,1,Vente,130000.0,9,,RUE DES INNOCENTS,4718,75001,75101,Paris 1er Arrondissement,75,,,75101000AO0081,,,114.0,,19.0,,,,,,,,2,2.0,Appartement,14.0,1.0,,,,,,2.347393,48.860387,0,11,Paris,Île-de-France,Paris,16252.0,2017,3,7,3,75101000AO0081,751010201.0,<1948,AVANT_1949,19.0,36.0,231.8,7.3,136.2,4.2,53.5,1.7,395.0,12.0,0.03,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,443.0,0.0,0.0,147.0,97.0,105.0,,Faible
4,2017-731661,2017-07-05,1,Vente,485000.0,30,,RUE BEAUBOURG,759,75003,75103,Paris 3e Arrondissement,75,,,75103000AT0078,,,4.0,50.07,,,,,,,,,1,2.0,Appartement,38.0,3.0,,,,,,2.354479,48.861846,0,11,Paris,Île-de-France,Paris,34788.0,2017,3,7,5,75103000AT0078,751031202.0,<1948,AVANT_1949,18.0,36.0,291.4,37.7,135.4,20.9,200.0,14.0,447.0,53.2,0.23,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,101.0,0.0,6.0,60.0,40.0,45.0,,Faible


In [97]:
# load model according to user attributes

model_loader = load_model(
    path=f"{BACKUP_DIR}/models",
    estimator_name="XGBRegressor", 
    version=0, 
    property_type=user_args["property_type"],
    geo_area=user_args["geo_area"]
)

In [98]:
X = {
    feature: np.nan for feature in model_loader["feature_names"]
}
X

{'l_altitude_sol_mean': nan,
 'l_valeur_fonciere_ma90': nan,
 'l_estim_ges_min': nan,
 'l_valeur_fonciere_ma14': nan,
 'nb_lot_tot': nan,
 'l_ratio_ges_conso': nan,
 'nb_log': nan,
 'l_distance_batiment_historique_plus_proche': nan,
 'nb_lot_garpark': nan,
 'nombre_pieces_principales_3': nan,
 'nombre_pieces_principales_1': nan,
 'nb_lot_tertiaire': nan,
 'l_hauteur_mean': nan,
 'l_valeur_fonciere_ma7': nan,
 'nombre_pieces_principales_4': nan,
 'l_conso_ener_min': nan,
 'l_estim_ges_max': nan,
 'l_surface_reelle_bati': nan,
 'l_conso_ener_max': nan,
 'l_estim_ges_std': nan,
 'l_valeur_fonciere_ma30': nan,
 'l_conso_ener_mean': nan,
 'dependance': nan,
 'l_conso_ener_std': nan,
 'nombre_pieces_principales_5': nan,
 'arrondissement_16': nan,
 'l_estim_ges_mean': nan}

## Préparation des données

In [99]:
# try to find user adresse in the dataset

from pandas.core.frame import DataFrame
from typing import Union

def find_adress_in_df(df, user_args) -> Union[DataFrame, None]:

    mask = (
        (df.adresse_numero == user_args["adresse_numero"]) &
        (df.adresse_nom_voie.str.lower() == user_args["adresse_nom_voie"].lower()) &
        (df.code_postal == user_args["code_postal"])
    )

    result = df[mask]

    if len(result) == 0: 
        return 
    
    return result 

result = find_adress_in_df(df, user_args)
result

Unnamed: 0,id_mutation,date_mutation,numero_disposition,nature_mutation,valeur_fonciere,adresse_numero,adresse_suffixe,adresse_nom_voie,adresse_code_voie,code_postal,code_commune,nom_commune,code_departement,ancien_code_commune,ancien_nom_commune,id_parcelle,ancien_id_parcelle,numero_volume,lot1_numero,lot1_surface_carrez,lot2_numero,lot2_surface_carrez,lot3_numero,lot3_surface_carrez,lot4_numero,lot4_surface_carrez,lot5_numero,lot5_surface_carrez,nombre_lots,code_type_local,type_local,surface_reelle_bati,nombre_pieces_principales,code_nature_culture,nature_culture,code_nature_culture_speciale,nature_culture_speciale,surface_terrain,longitude,latitude,dependance,code_region,nom_departement,nom_region,degre_densite,pop,annee,trimestre,mois,jour,parcelle_id,code_iris,periode_construction,periode_construction_max,hauteur_mean,altitude_sol_mean,conso_ener_mean,estim_ges_mean,conso_ener_std,estim_ges_std,conso_ener_min,estim_ges_min,conso_ener_max,estim_ges_max,ratio_ges_conso,enr_solaire_photovoltaique,enr_solaire_thermique_(chauffage)solaire_thermique_(ecs),enr_solaire_thermique_(ecs+chauffage),baie_u,mur_u_ext,pb_u,ph_u,mur_pos_isol_ext,prc_s_vitree_ext,presence_balcon,presence_climatisation,baie_orientation_indetermine,baie_orientation_nord,baie_orientation_ouest,baie_orientation_est,baie_orientation_horizontale,baie_orientation_est_ou_ouest,baie_orientation_sud,distance_batiment_historique_plus_proche,qpv,nb_lot_garpark,nb_lot_tot,nb_log,nb_lot_tertiaire,alea_argiles,alea_radon
66455,2019-1525239,2019-06-19,1,Vente,952450.0,15,,RUE DE LA CONVENTION,2300.0,75015,75115,Paris 15e Arrondissement,75,,,75115000FX0001,,,11,92.4,37.0,,,,,,,,2,2.0,Appartement,92.0,3.0,,,,,,2.279226,48.845253,0,11,Paris,Île-de-France,Paris,233484.0,2019,2,6,19,75115000FX0001,751156013.0,<1948,AVANT_1949,30.0,27.0,297.7,32.6,174.4,29.0,148.3,2.7,675.3,99.4,0.17,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,282.0,0.0,0.0,166.0,164.0,88.0,,Faible


In [78]:
from typing import Dict

def format_user_data(result: DataFrame, user_args: Dict) -> DataFrame:

    new_result = result.copy()

    for var in result.columns: 
        if var in user_args.keys(): 
            new_result[var] = user_args[var]
        else: 
            new_result[var] = np.nan

    return new_result 

result = format_user_data(result, user_args)
result

Unnamed: 0,id_mutation,date_mutation,numero_disposition,nature_mutation,valeur_fonciere,adresse_numero,adresse_suffixe,adresse_nom_voie,adresse_code_voie,code_postal,code_commune,nom_commune,code_departement,ancien_code_commune,ancien_nom_commune,id_parcelle,ancien_id_parcelle,numero_volume,lot1_numero,lot1_surface_carrez,lot2_numero,lot2_surface_carrez,lot3_numero,lot3_surface_carrez,lot4_numero,lot4_surface_carrez,lot5_numero,lot5_surface_carrez,nombre_lots,code_type_local,type_local,surface_reelle_bati,nombre_pieces_principales,code_nature_culture,nature_culture,code_nature_culture_speciale,nature_culture_speciale,surface_terrain,longitude,latitude,dependance,code_region,nom_departement,nom_region,degre_densite,pop,annee,trimestre,mois,jour,parcelle_id,code_iris,periode_construction,periode_construction_max,hauteur_mean,altitude_sol_mean,conso_ener_mean,estim_ges_mean,conso_ener_std,estim_ges_std,conso_ener_min,estim_ges_min,conso_ener_max,estim_ges_max,ratio_ges_conso,enr_solaire_photovoltaique,enr_solaire_thermique_(chauffage)solaire_thermique_(ecs),enr_solaire_thermique_(ecs+chauffage),baie_u,mur_u_ext,pb_u,ph_u,mur_pos_isol_ext,prc_s_vitree_ext,presence_balcon,presence_climatisation,baie_orientation_indetermine,baie_orientation_nord,baie_orientation_ouest,baie_orientation_est,baie_orientation_horizontale,baie_orientation_est_ou_ouest,baie_orientation_sud,distance_batiment_historique_plus_proche,qpv,nb_lot_garpark,nb_lot_tot,nb_log,nb_lot_tertiaire,alea_argiles,alea_radon
66455,,,,,,15,,Rue de la Convention,,75015,,,,,,,,,,,,,,,,,,,,,,,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [100]:
from typing import List

def get_movav_windows(feature_names: List) -> List:
    """Get moving average windows from the dataset"""
    
    mov_av_windows = [
        int(extract_int_from_string(feature))            
        for feature in feature_names
        if feature.startswith("l_valeur_fonciere_ma") 
    ]
    
    return mov_av_windows

mov_av_windows = get_movav_windows(model_loader["feature_names"])
print(mov_av_windows)

[90, 14, 7, 30]


In [102]:
# add result to df for moving average calculation 

def add_result_to_df(result: DataFrame, df: DataFrame) -> DataFrame:

    new_df = pd.concat([df, result])
    return new_df

df_test = add_result_to_df(result, df)
df_test

Unnamed: 0,id_mutation,date_mutation,numero_disposition,nature_mutation,valeur_fonciere,adresse_numero,adresse_suffixe,adresse_nom_voie,adresse_code_voie,code_postal,code_commune,nom_commune,code_departement,ancien_code_commune,ancien_nom_commune,id_parcelle,ancien_id_parcelle,numero_volume,lot1_numero,lot1_surface_carrez,lot2_numero,lot2_surface_carrez,lot3_numero,lot3_surface_carrez,lot4_numero,lot4_surface_carrez,lot5_numero,lot5_surface_carrez,nombre_lots,code_type_local,type_local,surface_reelle_bati,nombre_pieces_principales,code_nature_culture,nature_culture,code_nature_culture_speciale,nature_culture_speciale,surface_terrain,longitude,latitude,dependance,code_region,nom_departement,nom_region,degre_densite,pop,annee,trimestre,mois,jour,parcelle_id,code_iris,periode_construction,periode_construction_max,hauteur_mean,altitude_sol_mean,conso_ener_mean,estim_ges_mean,conso_ener_std,estim_ges_std,conso_ener_min,estim_ges_min,conso_ener_max,estim_ges_max,ratio_ges_conso,enr_solaire_photovoltaique,enr_solaire_thermique_(chauffage)solaire_thermique_(ecs),enr_solaire_thermique_(ecs+chauffage),baie_u,mur_u_ext,pb_u,ph_u,mur_pos_isol_ext,prc_s_vitree_ext,presence_balcon,presence_climatisation,baie_orientation_indetermine,baie_orientation_nord,baie_orientation_ouest,baie_orientation_est,baie_orientation_horizontale,baie_orientation_est_ou_ouest,baie_orientation_sud,distance_batiment_historique_plus_proche,qpv,nb_lot_garpark,nb_lot_tot,nb_log,nb_lot_tertiaire,alea_argiles,alea_radon
0,2017-731653,2017-07-04,1,Vente,571110.0,16,,RUE SAINT GILLES,8627,75003,75103,Paris 3e Arrondissement,75,,,75103000AM0035,,,12.0,,36.0,49.51,,,,,,,2,2.0,Appartement,42.0,3.0,,,,,,2.366404,48.858043,0,11,Paris,Île-de-France,Paris,34788.0,2017,3,7,4,75103000AM0035,751031102.0,<1948,AVANT_1949,16.0,35.0,269.0,13.0,208.5,4.4,69.0,8.0,485.0,16.0,0.23,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,323.0,0.0,0.0,26.0,23.00,26.0,,Faible
1,2017-731655,2017-07-04,1,Vente,1578150.0,66,,RUE DE LISBONNE,5708,75008,75108,Paris 8e Arrondissement,75,,,75108000CO0044,,,61.0,139.02,62.0,,66.0,,,,,,3,2.0,Appartement,135.0,4.0,,,,,,2.306998,48.877579,0,11,Paris,Île-de-France,Paris,36453.0,2017,3,7,4,75108000CO0044,751083203.0,<1948,AVANT_1949,20.0,47.0,197.0,40.3,21.3,3.2,181.9,38.0,212.0,42.6,0.18,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,388.0,0.0,2.0,26.0,23.00,23.0,,Faible
2,2017-731657,2017-07-05,1,Vente,2810880.0,27,,RUE MARBEUF,5993,75008,75108,Paris 8e Arrondissement,75,,,75108000AR0060,,,93.0,219.60,,,,,,,,,1,2.0,Appartement,220.0,6.0,,,,,,2.304234,48.869357,1,11,Paris,Île-de-France,Paris,36453.0,2017,3,7,5,75108000AR0060,751082903.0,<1948,AVANT_1949,24.0,37.0,1604.3,51.6,1283.7,41.8,183.0,5.0,2721.0,88.0,0.03,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,271.0,0.0,0.0,164.0,59.00,104.0,,Faible
3,2017-731658,2017-07-03,1,Vente,130000.0,9,,RUE DES INNOCENTS,4718,75001,75101,Paris 1er Arrondissement,75,,,75101000AO0081,,,114.0,,19.0,,,,,,,,2,2.0,Appartement,14.0,1.0,,,,,,2.347393,48.860387,0,11,Paris,Île-de-France,Paris,16252.0,2017,3,7,3,75101000AO0081,751010201.0,<1948,AVANT_1949,19.0,36.0,231.8,7.3,136.2,4.2,53.5,1.7,395.0,12.0,0.03,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,443.0,0.0,0.0,147.0,97.00,105.0,,Faible
4,2017-731661,2017-07-05,1,Vente,485000.0,30,,RUE BEAUBOURG,759,75003,75103,Paris 3e Arrondissement,75,,,75103000AT0078,,,4.0,50.07,,,,,,,,,1,2.0,Appartement,38.0,3.0,,,,,,2.354479,48.861846,0,11,Paris,Île-de-France,Paris,34788.0,2017,3,7,5,75103000AT0078,751031202.0,<1948,AVANT_1949,18.0,36.0,291.4,37.7,135.4,20.9,200.0,14.0,447.0,53.2,0.23,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,101.0,0.0,6.0,60.0,40.00,45.0,,Faible
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160753,2022-537266,2022-06-22,1,Vente,330375.0,10,,RUE DU CHEVALERET,1990,75013,75113,Paris 13e Arrondissement,75,,,75113000CI0017,,,22.0,,37.0,,4.0,,,,,,3,2.0,Appartement,40.0,2.0,,,,,,2.378447,48.825798,0,11,Paris,Île-de-France,Paris,181552.0,2022,2,6,22,75113000CI0017,751135013.0,<1948,,7.0,34.0,678.3,21.7,297.0,9.5,382.0,12.0,976.0,31.0,0.03,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,495.0,0.0,0.0,29.9,21.42,0.0,,Faible
160754,2022-537267,2022-06-29,1,Vente,2801244.0,3,,VLA JOCELYN,4983,75016,75116,Paris 16e Arrondissement,75,,,75116000EB0007,,,313.0,183.05,,,,,,,,,1,2.0,Appartement,172.0,6.0,,,,,,2.276055,48.865299,1,11,Paris,Île-de-France,Paris,165446.0,2022,2,6,29,75116000EB0007,751166303.0,<1948,AVANT_1949,26.0,53.0,211.6,45.1,93.6,23.5,92.8,3.0,416.8,107.1,0.23,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,429.0,0.0,31.0,308.0,65.00,65.0,,Faible
160755,2022-537271,2022-06-21,1,Vente,423000.0,43,,RUE DES FAVORITES,3536,75015,75115,Paris 15e Arrondissement,75,,,75115000AC0055,,,16.0,,48.0,,,,,,,,2,2.0,Appartement,43.0,2.0,,,,,,2.305379,48.837454,1,11,Paris,Île-de-France,Paris,233484.0,2022,2,6,21,75115000AC0055,751155731.0,bad inf,DE_1975_A_1993,18.0,49.0,219.9,64.1,38.0,32.9,163.0,31.0,240.6,92.4,0.19,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,498.0,0.0,37.0,100.0,36.00,0.0,,Faible
160756,2022-537272,2022-06-30,1,Vente,104650.0,50,,RUE LEON FROT,5554,75011,75111,Paris 11e Arrondissement,75,,,75111000BS0234,,,31.0,,,,,,,,,,1,2.0,Appartement,15.0,1.0,,,,,,2.386354,48.856079,0,11,Paris,Île-de-France,Paris,147017.0,2022,2,6,30,75111000BS0234,751114302.0,<1948,AVANT_1949,20.0,45.0,419.0,13.7,157.2,4.9,238.0,8.0,521.0,17.0,,0.0,0.0,0.0,,,,,ITI,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,390.0,0.0,0.0,38.0,36.00,38.0,,Faible
