In [4]:
import pandas as pd

# Charger le dataset Daily
df_daily = pd.read_csv('property_daily_data_wei.csv')

# Aperçu pour vérifier les nouvelles colonnes
print("Colonnes disponibles :", df_daily.columns.tolist())
df_daily.head()

Colonnes disponibles : ['city', 'country', 'longitude', 'latitude', 'sqm', 'total_rooms', 'max_guests', 'has_wifi', 'has_pool', 'distance_to_center', 'distance_to_sea', 'typeOfRental', 'nombre_etoiles', 'rental_rent']


Unnamed: 0,city,country,longitude,latitude,sqm,total_rooms,max_guests,has_wifi,has_pool,distance_to_center,distance_to_sea,typeOfRental,nombre_etoiles,rental_rent
0,Madrid,Spain,-3.716074,40.427803,57,2,5,0,0,5.18,12.35,DAILY,2,89115978517454048
1,Sevilla,Spain,-5.986343,37.389724,167,4,9,1,1,11.05,5.66,DAILY,1,164000917288372928
2,Madrid,Spain,-3.733324,40.43772,173,4,10,0,0,9.12,13.94,DAILY,2,163664885626190048
3,New York,USA,-74.050971,40.753246,121,4,10,1,1,13.63,15.34,DAILY,4,474564047002021888
4,Lyon,France,4.798111,45.780467,17,1,2,0,0,0.83,14.3,DAILY,5,212721272043853184


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

# 1. Définition des colonnes
# On sépare les caractéristiques (X) de la cible (y)
X = df_daily.drop(columns=['rental_rent', 'typeOfRental'])
y = df_daily['rental_rent']

# Liste des colonnes par type
categorical_features = ['city', 'country']
# Note : on inclut max_guests et les distances ici
numerical_features = [
    'longitude', 'latitude', 'sqm', 'total_rooms', 
    'max_guests', 'distance_to_center', 'distance_to_sea', 
    'has_wifi', 'has_pool', 'nombre_etoiles'
]

# 2. Pipeline de transformation
preprocessor_daily = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# 3. Application
X_processed = preprocessor_daily.fit_transform(X)

# 4. Division Train/Test
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

print(f"Preprocessing Daily terminé. Nombre de features : {X_processed.shape[1]}")

Preprocessing Daily terminé. Nombre de features : 33


In [6]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error

# Entraînement
model_daily = RandomForestRegressor(n_estimators=100, random_state=42)
model_daily.fit(X_train, y_train)

# Évaluation
y_pred = model_daily.predict(X_test)
r2 = r2_score(y_test, y_pred)
mae_eth = mean_absolute_error(y_test, y_pred) / 10**18

print(f"Score R² : {r2:.4f}")
print(f"Erreur moyenne : {mae_eth:.6f} ETH par nuit")

Score R² : 0.9662
Erreur moyenne : 0.015302 ETH par nuit


In [7]:
import joblib

# Sauvegarde avec des noms explicites
joblib.dump(model_daily, 'model_daily.joblib')
joblib.dump(preprocessor_daily, 'preprocessor_daily.joblib')

print("Modèle Daily et son Préprocesseur sauvegardés !")

Modèle Daily et son Préprocesseur sauvegardés !
