https://www.data.gouv.fr/datasets/demandes-de-valeurs-foncieres-geolocalisees/?utm_source=chatgpt.com

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Affiche toutes les lignes
pd.set_option("display.max_rows", None)

# Affiche toutes les colonnes
pd.set_option("display.max_columns", None)

# Choisis le nombre de caractère par colonnes
pd.set_option("display.max_colwidth", 50)

In [2]:
df_path = "full.csv"

df = pd.read_csv(df_path, low_memory=False)

In [3]:
col_to_keep = ["valeur_fonciere","id_mutation", "numero_disposition", "id_parcelle", "code_commune", "nom_commune", "code_postal", "type_local", "nature_culture", "nature_culture_speciale", 
               "nombre_lots", "nombre_pieces_principales", "surface_reelle_bati", "surface_terrain"]

df = df[col_to_keep].head(10000)

In [4]:
# garder uniquement les maisons et les appartements
type_local_to_keep = ['Maison', 'Appartement']
df = df[(df['type_local'].isin(type_local_to_keep))]

In [5]:
# Vue d"ensemble (shape, dtypes)
def quick_overview(df, name):
    print(f"\n{name.upper()} SHAPE: {df.shape}")
    display(df.head())
    print(f"{name.upper()} Dtypes: \n{df.dtypes.value_counts()}")
    display(df.dtypes)
    #display(df.describe().T)
    display(df.describe(include="all").T)
    
quick_overview(df, "Dataset")


DATASET SHAPE: (2220, 14)


Unnamed: 0,valeur_fonciere,id_mutation,numero_disposition,id_parcelle,code_commune,nom_commune,code_postal,type_local,nature_culture,nature_culture_speciale,nombre_lots,nombre_pieces_principales,surface_reelle_bati,surface_terrain
6,329500.0,2024-4,1,01173000AI0551,1173,Gex,1170.0,Appartement,,,2,4.0,89.0,
78,94500.0,2024-7,1,01202000AC0198,1202,Lagnieu,1150.0,Appartement,sols,,0,3.0,74.0,65.0
80,94500.0,2024-7,1,01202000AC0198,1202,Lagnieu,1150.0,Appartement,sols,,0,2.0,32.0,65.0
81,220000.0,2024-8,1,010560000C2523,1056,Boyeux-Saint-Jérôme,1640.0,Maison,sols,,0,1.0,40.0,488.0
82,220000.0,2024-8,1,010560000C2524,1056,Boyeux-Saint-Jérôme,1640.0,Maison,sols,,0,2.0,80.0,858.0


DATASET Dtypes: 
object     7
float64    5
int64      2
Name: count, dtype: int64


valeur_fonciere              float64
id_mutation                   object
numero_disposition             int64
id_parcelle                   object
code_commune                  object
nom_commune                   object
code_postal                  float64
type_local                    object
nature_culture                object
nature_culture_speciale       object
nombre_lots                    int64
nombre_pieces_principales    float64
surface_reelle_bati          float64
surface_terrain              float64
dtype: object

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
valeur_fonciere,2218.0,,,,279058.779788,235226.764634,1.0,134062.5,226498.5,340000.0,2300000.0
id_mutation,2220.0,1694.0,2024-1800,80.0,,,,,,,
numero_disposition,2220.0,,,,1.106306,0.334922,1.0,1.0,1.0,1.0,4.0
id_parcelle,2220.0,1587.0,01283000AO0410,80.0,,,,,,,
code_commune,2220.0,307.0,01053,155.0,,,,,,,
nom_commune,2220.0,307.0,Bourg-en-Bresse,155.0,,,,,,,
code_postal,2220.0,,,,1317.511712,224.516225,1000.0,1140.0,1260.0,1480.0,1990.0
type_local,2220.0,2.0,Maison,1260.0,,,,,,,
nature_culture,1516.0,8.0,sols,1334.0,,,,,,,
nature_culture_speciale,17.0,3.0,Jardin d'agrément,10.0,,,,,,,


In [6]:
df.isna().sum()

valeur_fonciere                 2
id_mutation                     0
numero_disposition              0
id_parcelle                     0
code_commune                    0
nom_commune                     0
code_postal                     0
type_local                      0
nature_culture                704
nature_culture_speciale      2203
nombre_lots                     0
nombre_pieces_principales       0
surface_reelle_bati             0
surface_terrain               704
dtype: int64

In [7]:
group_cols = ["id_mutation", "numero_disposition", "id_parcelle"]

agg = df.groupby(group_cols, as_index=False).agg(valeur_fonciere=("valeur_fonciere","first"))

print(agg)

     id_mutation  numero_disposition     id_parcelle  valeur_fonciere
0        2024-10                   2  01449000AC0558        190000.00
1       2024-100                   1  01321000ZA0377        127500.00
2      2024-1000                   1  01053000BC0397         69000.00
3      2024-1002                   1  01399000AC0091        545000.00
4      2024-1003                   1  013470000C0463        540000.00
5      2024-1004                   1  013850000A1144        158930.00
6      2024-1007                   1  014570000C1051        280000.00
7      2024-1009                   1  01053000AI0298         55000.00
8      2024-1011                   1  01143000AE0256        550000.00
9      2024-1012                   1  01249000AE0489        238000.00
10     2024-1013                   1  01004000AL0468        187000.00
11     2024-1014                   1  012930000X0405        280000.00
12     2024-1016                   1  01047000AD0034        365000.00
13     2024-1017    