In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import time
import warnings
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, cross_val_score, KFold
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Modèles
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor

warnings.filterwarnings('ignore')
os.makedirs('../output', exist_ok=True)

In [4]:
df = pd.read_csv("../data/clean/cleaned_data_filtered.csv")
print(f"\nDataset: {df.shape[0]} propriétés, {df.shape[1]} colonnes")


Dataset: 4856 propriétés, 8 colonnes


#### Feature Engineering

In [None]:
# Features de base
df['room_bathroom_ratio'] = df['room_count'] / np.maximum(df['bathroom_count'], 1)
df['total_rooms'] = df['room_count'] + df['bathroom_count']
df['size_per_room'] = df['size'] / np.maximum(df['room_count'], 1)
df['bathroom_density'] = df['bathroom_count'] / np.maximum(df['size'], 1)
df['size_x_rooms'] = df['size'] * df['room_count']
df['size_x_bathrooms'] = df['size'] * df['bathroom_count']

high_value_locations = ['tunis', 'ariana', 'ben arous', 'la manouba']
upscale_locations = ['nabeul', 'sousse', 'monastir', 'mahdia', 'bizerte']

df['is_premium_location'] = df['location'].str.lower().isin(high_value_locations).astype(int)
df['is_upscale_location'] = df['location'].str.lower().isin(upscale_locations).astype(int)

df['luxury_score'] = (
    (df['size'] / 100) * 0.3 +           
    (df['room_count'] / 5) * 0.2 +       
    (df['bathroom_count'] / 2) * 0.2 +   
    df['is_premium_location'] * 0.3 + 
    df['is_upscale_location'] * 0.15
)

df['property_tier'] = pd.cut(df['luxury_score'], 
                              bins=3, 
                              labels=['standard', 'upscale', 'luxury'])


engineered_cols = ['room_bathroom_ratio', 'total_rooms', 'size_per_room', 
                   'bathroom_density', 'size_x_rooms', 'size_x_bathrooms',
                   'luxury_score', 'is_premium_location', 'is_upscale_location']
for col in engineered_cols:
    if df[col].dtype in ['float64', 'int64']:
        df[col] = df[col].replace([np.inf, -np.inf], np.nan).fillna(df[col].median())

print("  Features créées: luxury_score, property_tier, is_premium_location, is_upscale_location")
categorical_cols = ['category', 'type', 'location', 'property_tier']

  Features créées: luxury_score, property_tier, is_premium_location, is_upscale_location


Préparer pour cross-validation avec calcul du prix médian par zone

* Note: Le prix médian sera calculé sur le train set uniquement dans la CV

In [None]:
# Fonction pour ajouter les statistiques de location
def add_location_features(df_train, df_test, df_full):
    # Calculer sur train uniquement
    location_stats = df_train.groupby('location').agg({
        'price': ['median', 'mean', 'std', 'count']
    })
    location_stats.columns = ['location_price_median', 'location_price_mean', 
                              'location_price_std', 'location_count']
    location_stats = location_stats.reset_index()
    
    # Normaliser les stats de prix (pour éviter le leakage direct)
    location_stats['location_price_level'] = (
        location_stats['location_price_median'] / location_stats['location_price_median'].median()
    )
    
    # Merger sur train et test
    df_train_merged = df_train.merge(location_stats[['location', 'location_price_level']], 
                                     on='location', how='left')
    df_test_merged = df_test.merge(location_stats[['location', 'location_price_level']], 
                                   on='location', how='left')
    
    # Remplir les valeurs manquantes (nouvelles locations) avec la médiane
    global_median = location_stats['location_price_level'].median()
    df_train_merged['location_price_level'] = df_train_merged['location_price_level'].fillna(global_median)
    df_test_merged['location_price_level'] = df_test_merged['location_price_level'].fillna(global_median)
    
    return df_train_merged, df_test_merged, location_stats

In [None]:
# Configuration des modèles avec leurs grilles
models_config = {
    "Decision Tree": {
        "model": DecisionTreeRegressor(random_state=42),
        "params": {
            'max_depth': [10, 15, 20, 25],
            'min_samples_split': [10, 20, 30],
            'min_samples_leaf': [5, 10, 15]
        }
    },
    "Random Forest": {
        "model": RandomForestRegressor(random_state=42, n_jobs=-1),
        "params": {
            'n_estimators': [100, 200, 300],
            'max_depth': [15, 20, 25],
            'min_samples_split': [5, 10, 15],
            'min_samples_leaf': [2, 4, 6],
            'max_features': ['sqrt', 'log2']
        }
    },
    "Gradient Boosting": {
        "model": GradientBoostingRegressor(random_state=42),
        "params": {
            'n_estimators': [100, 200, 300],
            'learning_rate': [0.01, 0.05, 0.1],
            'max_depth': [4, 6, 8],
            'min_samples_split': [5, 10, 15],
            'subsample': [0.8, 0.9, 1.0],
            'max_features': ['sqrt', 'log2']
        }
    },
    "XGBoost": {
        "model": XGBRegressor(random_state=42, n_jobs=-1, objective='reg:squarederror'),
        "params": {
            'n_estimators': [100, 200, 300],
            'learning_rate': [0.01, 0.05, 0.1],
            'max_depth': [4, 6, 8],
            'min_child_weight': [1, 3, 5],
            'subsample': [0.8, 0.9, 1.0],
            'colsample_bytree': [0.8, 0.9, 1.0]
        }
    },
    "MLP": {
        "model": MLPRegressor(random_state=42, max_iter=500, early_stopping=True),
        "params": {
            'hidden_layer_sizes': [(64,), (128, 64), (128, 64, 32)],
            'activation': ['relu', 'tanh'],
            'alpha': [0.0001, 0.001, 0.01],
            'learning_rate_init': [0.001, 0.01]
        }
    }
}

#### Optimisation des hyperparams

In [8]:
# Optimiser chaque modèle
optimized_models = {}
optimization_results = []

for name, config in models_config.items():
    print(f"\n{'='*80}")
    print(f"Optimisation: {name}")
    print(f"{'='*80}")
    
    n_combinations = np.prod([len(v) for v in config['params'].values()])
    print(f"Nombre de combinaisons à tester: {n_combinations}")
    
    start_time = time.time()
    
    # Custom CV pour ajouter location_price_level sans leakage
    kf_opt = KFold(n_splits=3, shuffle=True, random_state=42)
    best_score = -np.inf
    best_params = None
    best_estimator = None
    
    # Grid Search manuel pour contrôler le preprocessing
    from sklearn.model_selection import ParameterGrid
    param_grid = list(ParameterGrid(config['params']))
    
    print(f"Testing {len(param_grid)} combinations...")
    
    for i, params in enumerate(param_grid):
        if i % 10 == 0:
            print(f"  Progress: {i}/{len(param_grid)}...", end='\r')
        
        scores = []
        for train_idx, test_idx in kf_opt.split(df):
            # Split data
            df_train_fold = df.iloc[train_idx].copy()
            df_test_fold = df.iloc[test_idx].copy()
            
            # Add location features (calculated on train only)
            df_train_fold, df_test_fold, _ = add_location_features(
                df_train_fold, df_test_fold, df
            )
            
            # One-hot encoding
            df_train_encoded = pd.get_dummies(df_train_fold, columns=categorical_cols, drop_first=False)
            df_test_encoded = pd.get_dummies(df_test_fold, columns=categorical_cols, drop_first=False)
            
            # Align columns
            missing_cols = set(df_train_encoded.columns) - set(df_test_encoded.columns)
            for col in missing_cols:
                df_test_encoded[col] = 0
            df_test_encoded = df_test_encoded[df_train_encoded.columns]
            
            # Features and target
            feature_cols_fold = [col for col in df_train_encoded.columns 
                                if col not in ['price', 'log_price']]
            X_train_fold = df_train_encoded[feature_cols_fold]
            y_train_fold = df_train_encoded['log_price']
            X_test_fold = df_test_encoded[feature_cols_fold]
            y_test_fold = df_test_encoded['log_price']
            
            # Scale
            scaler_fold = StandardScaler()
            num_cols_fold = ['room_count', 'bathroom_count', 'size', 'room_bathroom_ratio', 
                            'total_rooms', 'size_per_room', 'bathroom_density',
                            'size_x_rooms', 'size_x_bathrooms', 'luxury_score',
                            'is_premium_location', 'location_price_level', 'is_upscale_location']
            num_cols_present = [col for col in num_cols_fold if col in X_train_fold.columns]
            X_train_fold[num_cols_present] = scaler_fold.fit_transform(X_train_fold[num_cols_present])
            X_test_fold[num_cols_present] = scaler_fold.transform(X_test_fold[num_cols_present])
            
            # Train and score
            model_fold = config['model'].__class__(**params, random_state=42)
            if hasattr(model_fold, 'n_jobs'):
                model_fold.n_jobs = -1
            model_fold.fit(X_train_fold, y_train_fold)
            score = model_fold.score(X_test_fold, y_test_fold)
            scores.append(score)
        
        avg_score = np.mean(scores)
        if avg_score > best_score:
            best_score = avg_score
            best_params = params
            best_estimator = config['model'].__class__(**params, random_state=42)
            if hasattr(best_estimator, 'n_jobs'):
                best_estimator.n_jobs = -1
    
    elapsed = time.time() - start_time
    
    print(f"\n\tTerminé en {elapsed:.1f}s")
    print(f"Meilleur score R²: {best_score:.4f}")
    print(f"Meilleurs paramètres:")
    for param, value in best_params.items():
        print(f"  * {param}: {value}")
    
    optimized_models[name] = best_estimator
    
    optimization_results.append({
        'Model': name,
        'Best_R2': best_score,
        'Best_Params': best_params,
        'Time_seconds': elapsed
    })


Optimisation: Decision Tree
Nombre de combinaisons à tester: 36
Testing 36 combinations...
  Progress: 30/36...
	Terminé en 3.1s
Meilleur score R²: 0.9330
Meilleurs paramètres:
  * max_depth: 10
  * min_samples_leaf: 15
  * min_samples_split: 10

Optimisation: Random Forest
Nombre de combinaisons à tester: 162
Testing 162 combinations...
  Progress: 160/162...
	Terminé en 132.3s
Meilleur score R²: 0.9347
Meilleurs paramètres:
  * max_depth: 15
  * max_features: sqrt
  * min_samples_leaf: 2
  * min_samples_split: 10
  * n_estimators: 300

Optimisation: Gradient Boosting
Nombre de combinaisons à tester: 486
Testing 486 combinations...
  Progress: 480/486...
	Terminé en 550.3s
Meilleur score R²: 0.9375
Meilleurs paramètres:
  * learning_rate: 0.05
  * max_depth: 4
  * max_features: log2
  * min_samples_split: 10
  * n_estimators: 200
  * subsample: 0.9

Optimisation: XGBoost
Nombre de combinaisons à tester: 729
Testing 729 combinations...
  Progress: 720/729...
	Terminé en 523.5s
Meilleu

#### Evaluation finale des modèles optimisés

In [9]:
# Calculer location_price_level sur l'ensemble complet (pour training final)
location_stats_full = df.groupby('location')['price'].median().reset_index()
location_stats_full['location_price_level'] = (
    location_stats_full['price'] / location_stats_full['price'].median()
)
df_final = df.merge(location_stats_full[['location', 'location_price_level']], 
                   on='location', how='left')
df_final['location_price_level'] = df_final['location_price_level'].fillna(1.0)

In [10]:
# One-hot encoding
df_encoded = pd.get_dummies(df_final, columns=categorical_cols, drop_first=False)

In [11]:
feature_cols = [col for col in df_encoded.columns if col not in ['price', 'log_price']]
X = df_encoded[feature_cols].copy()
y = df_encoded['log_price'].copy()

num_cols = ['room_count', 'bathroom_count', 'size', 'room_bathroom_ratio', 
            'total_rooms', 'size_per_room', 'bathroom_density',
            'size_x_rooms', 'size_x_bathrooms', 'luxury_score',
            'is_premium_location', 'is_upscale_location' ,'location_price_level']

In [12]:
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])

print(f"Features finales: {X.shape[1]} colonnes")
print(f"Target: log10(price)")

Features finales: 47 colonnes
Target: log10(price)


In [13]:
# Évaluation avec 5-fold CV
kf = KFold(n_splits=5, shuffle=True, random_state=42)
final_results = {}

for name, model in optimized_models.items():
    print(f"\nÉvaluation finale: {name}...")
    
    # Cross-validation scores
    r2_scores = cross_val_score(model, X, y, cv=kf, scoring='r2')
    
    # Prédictions pour métriques détaillées
    y_pred_log = np.zeros_like(y)
    for train_idx, test_idx in kf.split(X):
        model.fit(X.iloc[train_idx], y.iloc[train_idx])
        y_pred_log[test_idx] = model.predict(X.iloc[test_idx])
    
    # Métriques log-space
    rmse_log = np.sqrt(mean_squared_error(y, y_pred_log))
    mae_log = mean_absolute_error(y, y_pred_log)
    r2_log = r2_score(y, y_pred_log)
    
    # Back-transform avec bias correction
    y_actual = 10 ** y
    y_pred_raw = 10 ** y_pred_log
    
    df_temp = pd.DataFrame({
        'actual_log': y.values,
        'pred_log': y_pred_log,
        'type': df['type'].values
    })
    
    bias_factors = {}
    for prop_type in df_temp['type'].unique():
        mask = df_temp['type'] == prop_type
        mean_actual = np.mean(df_temp.loc[mask, 'actual_log'])
        mean_pred = np.mean(df_temp.loc[mask, 'pred_log'])
        bias_factors[prop_type] = 10 ** (mean_actual - mean_pred)
    
    y_pred_corrected = np.array([
        y_pred_raw[i] * bias_factors[df['type'].iloc[i]] 
        for i in range(len(y_pred_raw))
    ])
    
    # Métriques espace réel
    rmse_actual = np.sqrt(mean_squared_error(y_actual, y_pred_corrected))
    mae_actual = mean_absolute_error(y_actual, y_pred_corrected)
    mape = np.mean(np.abs((y_actual - y_pred_corrected) / y_actual) * 100)
    
    final_results[name] = {
        'R2_log': r2_log,
        'R2_std': r2_scores.std(),
        'RMSE_log': rmse_log,
        'MAE_log': mae_log,
        'RMSE_actual': rmse_actual,
        'MAE_actual': mae_actual,
        'MAPE': mape
    }


Évaluation finale: Decision Tree...

Évaluation finale: Random Forest...

Évaluation finale: Gradient Boosting...

Évaluation finale: XGBoost...

Évaluation finale: MLP...


In [14]:
# Créer tableau récapitulatif
results_df = pd.DataFrame(final_results).T
results_df = results_df.sort_values('R2_log', ascending=False)

print("\n" + "=" * 100)
print("TABLEAU RÉCAPITULATIF DES PERFORMANCES FINALES")
print("=" * 100)
print("\n", results_df.round(4).to_string())

# Identifier le meilleur
best_model_name = results_df['R2_log'].idxmax()
best_model = optimized_models[best_model_name]

print("\n" + "=" * 100)
print(f"\nLe meilleur modèle est: {best_model_name}")
print(f"  • R² = {results_df.loc[best_model_name, 'R2_log']:.4f}")
print(f"  • MAE = {results_df.loc[best_model_name, 'MAE_actual']:,.0f} TND")
print(f"  • MAPE = {results_df.loc[best_model_name, 'MAPE']:.2f}%")


TABLEAU RÉCAPITULATIF DES PERFORMANCES FINALES

                    R2_log  R2_std  RMSE_log  MAE_log  RMSE_actual   MAE_actual     MAPE
XGBoost            0.9386  0.0087    0.3087   0.2060  518679.9115  159064.8834  71.6293
Gradient Boosting  0.9385  0.0079    0.3089   0.2063  516952.2033  159718.7435  71.6776
MLP                0.9363  0.0081    0.3143   0.2123  519657.7988  160978.2078  72.8219
Random Forest      0.9360  0.0081    0.3152   0.2126  518572.2429  160643.3171  73.5146
Decision Tree      0.9344  0.0079    0.3190   0.2193  521154.1910  167488.2018  75.4636


Le meilleur modèle est: XGBoost
  • R² = 0.9386
  • MAE = 159,065 TND
  • MAPE = 71.63%


In [23]:
# Entraîner le modèle final sur toutes les données
print("\nEntraînement du modèle final sur toutes les données...")
best_model.fit(X, y)


Entraînement du modèle final sur toutes les données...


0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.9
,device,
,early_stopping_rounds,
,enable_categorical,False


In [24]:
# Sauvegarder le modèle et les objets nécessaires
joblib.dump(best_model, '../output/best_model.pkl')
joblib.dump(scaler, '../output/scaler.pkl')
joblib.dump(feature_cols, '../output/feature_cols.pkl')
joblib.dump(location_stats_full, '../output/location_stats.pkl')
joblib.dump(high_value_locations, '../output/premium_locations.pkl')
joblib.dump(upscale_locations, '../output/upscale_locations.pkl')

['../output/upscale_locations.pkl']

#### Interface de prédiction

In [25]:
# Charger le modèle et les objets nécessaires
model = joblib.load('../output/best_model.pkl')
scaler = joblib.load('../output/scaler.pkl')
feature_cols = joblib.load('../output/feature_cols.pkl')
location_stats = joblib.load('../output/location_stats.pkl')
high_value_locations = joblib.load('../output/premium_locations.pkl')
upscale_locations = joblib.load('../output/upscale_locations.pkl')

In [26]:
# Listes des options
VILLES = [
    'ariana', 'mahdia', 'sousse', 'tunis', 'nabeul', 'ben arous',
    'zaghouan', 'la manouba', 'bizerte', 'sfax', 'monastir',
    'médenine', 'gabès', 'gafsa', 'béja', 'kasserine', 'kairouan',
    'tozeur', 'jendouba', 'le kef', 'sidi bouzid', 'siliana',
    'tataouine'
]

CATEGORIES = [
    'Appartements', 'Terrains et Fermes', 'Bureaux et Plateaux',
    'Maisons et Villas', 'Locations de vacances', 'Colocations'
]

TYPES_TRANSACTION = ['À Vendre', 'À Louer']

In [27]:
def engineer_features(data, location):
    # Features de base
    data['room_bathroom_ratio'] = data['room_count'] / np.maximum(data['bathroom_count'], 1)
    data['total_rooms'] = data['room_count'] + data['bathroom_count']
    data['size_per_room'] = data['size'] / np.maximum(data['room_count'], 1)
    data['bathroom_density'] = data['bathroom_count'] / np.maximum(data['size'], 1)
    data['size_x_rooms'] = data['size'] * data['room_count']
    data['size_x_bathrooms'] = data['size'] * data['bathroom_count']
    
    # Premium location indicator
    data['is_premium_location'] = (location in high_value_locations) * 1
    data['is_upscale_location'] = (location in upscale_locations) * 1

    
    # Luxury score
    data['luxury_score'] = (
        (df['size'] / 100) * 0.3 +           
        (df['room_count'] / 5) * 0.2 +       
        (df['bathroom_count'] / 2) * 0.2 +   
        df['is_premium_location'] * 0.3 + 
        df['is_upscale_location'] * 0.15
    )
    
    # Property tier basé sur luxury_score
    if data['luxury_score'].values[0] < 0.5:
        property_tier = 'standard'
    elif data['luxury_score'].values[0] < 1.0:
        property_tier = 'upscale'
    else:
        property_tier = 'luxury'
    
    data['property_tier'] = property_tier
    
    # Location price level (calculé sur train set, stocké dans location_stats)
    location_price_level = location_stats[
        location_stats['location'] == location
    ]['location_price_level'].values
    
    if len(location_price_level) > 0:
        data['location_price_level'] = location_price_level[0]
    else:
        # Ville inconnue -> utiliser la médiane
        data['location_price_level'] = 1.0
    
    # Gestion des infinis
    for col in ['room_bathroom_ratio', 'total_rooms', 'size_per_room', 
                'bathroom_density', 'size_x_rooms', 'size_x_bathrooms', 'luxury_score']:
        data[col] = data[col].replace([np.inf, -np.inf], np.nan).fillna(data[col].median())
    
    return data

In [28]:
def prepare_input_for_prediction(room_count, bathroom_count, size, location, 
                                  category, transaction_type):
    # Créer DataFrame de base
    input_data = pd.DataFrame({
        'room_count': [room_count],
        'bathroom_count': [bathroom_count],
        'size': [size],
        'category': [category],
        'type': [transaction_type],
        'location': [location]
    })
    
    # Feature engineering
    input_data = engineer_features(input_data, location)
    
    # One-hot encoding
    input_encoded = pd.get_dummies(input_data, 
                                    columns=['category', 'type', 'location', 'property_tier'])
    
    # Créer un DataFrame avec toutes les features attendues
    X_pred = pd.DataFrame(0, index=[0], columns=feature_cols)
    
    # Remplir les colonnes présentes
    for col in input_encoded.columns:
        if col in X_pred.columns:
            X_pred[col] = input_encoded[col].values
    
    # Standardiser les features numériques
    num_cols = ['room_count', 'bathroom_count', 'size', 'room_bathroom_ratio', 
                'total_rooms', 'size_per_room', 'bathroom_density',
                'size_x_rooms', 'size_x_bathrooms', 'luxury_score',
                'is_premium_location','is_upscale_location' ,'location_price_level']
    
    X_pred[num_cols] = scaler.transform(X_pred[num_cols])
    
    return X_pred, transaction_type

In [29]:
def predict_price(room_count, bathroom_count, size, location, category, transaction_type):    
    # Préparer les données
    X_pred, prop_type = prepare_input_for_prediction(
        room_count, bathroom_count, size, location, category, transaction_type
    )
    
    # Prédiction en log-space
    log_price_pred = model.predict(X_pred)[0]
    
    # Back-transform
    price_pred = 10 ** log_price_pred
    
    # Bias correction basé sur le type (calculé lors de l'entraînement)
    bias_factors = {'À Vendre': 1.0022, 'À Louer': 0.9935}
    price_pred = price_pred * bias_factors.get(prop_type, 1.0)
    
    # Intervalle de confiance approximatif (±20% pour l'immobilier)
    conf_low = price_pred * 0.8
    conf_high = price_pred * 1.2
    
    return price_pred, conf_low, conf_high

In [32]:
print("EXEMPLES DE PRÉDICTIONS")

examples = [
    {
        "room_count": 3,
        "bathroom_count": 2,
        "size": 120,
        "location": "tunis",
        "category": "Appartements",
        "transaction_type": "À Louer"
    },
    {
        "room_count": 5,
        "bathroom_count": 3,
        "size": 300,
        "location": "ariana",
        "category": "Villas",
        "transaction_type": "À Vendre"
    },
    {
        "room_count": 1,
        "bathroom_count": 1,
        "size": 35,
        "location": "sousse",
        "category": "Studios",
        "transaction_type": "À Louer"
    },
    {
        "room_count": 4,
        "bathroom_count": 2,
        "size": 200,
        "location": "sfax",
        "category": "Maisons",
        "transaction_type": "À Vendre"
    }
]

for i, example in enumerate(examples, 1):
    print(f"\n{'─' * 80}")
    print(f"Exemple {i}")
    print(f"{'─' * 80}")
    print(f"Caractéristiques:")
    print(f"  * Chambres: {example['room_count']}")
    print(f"  * Salles de bain: {example['bathroom_count']}")
    print(f"  * Surface: {example['size']} m²")
    print(f"  * Ville: {example['location']}")
    print(f"  * Catégorie: {example['category']}")
    print(f"  * Type: {example['transaction_type']}")
    
    price, conf_low, conf_high = predict_price(
        example['room_count'],
        example['bathroom_count'],
        example['size'],
        example['location'],
        example['category'],
        example['transaction_type']
    )
    
    print(f"\nPRÉDICTION:")
    if example['transaction_type'] == 'À Louer':
        print(f"  Prix estimé: {price:,.0f} TND/mois")
        print(f"  Intervalle de confiance: {conf_low:,.0f} - {conf_high:,.0f} TND/mois")
    else:
        print(f"  Prix estimé: {price:,.0f} TND")
        print(f"  Intervalle de confiance: {conf_low:,.0f} - {conf_high:,.0f} TND")

EXEMPLES DE PRÉDICTIONS

────────────────────────────────────────────────────────────────────────────────
Exemple 1
────────────────────────────────────────────────────────────────────────────────
Caractéristiques:
  * Chambres: 3
  * Salles de bain: 2
  * Surface: 120 m²
  * Ville: tunis
  * Catégorie: Appartements
  * Type: À Louer

PRÉDICTION:
  Prix estimé: 1,563 TND/mois
  Intervalle de confiance: 1,250 - 1,875 TND/mois

────────────────────────────────────────────────────────────────────────────────
Exemple 2
────────────────────────────────────────────────────────────────────────────────
Caractéristiques:
  * Chambres: 5
  * Salles de bain: 3
  * Surface: 300 m²
  * Ville: ariana
  * Catégorie: Villas
  * Type: À Vendre

PRÉDICTION:
  Prix estimé: 628,208 TND
  Intervalle de confiance: 502,566 - 753,850 TND

────────────────────────────────────────────────────────────────────────────────
Exemple 3
────────────────────────────────────────────────────────────────────────────────
C

In [31]:
print("INTERFACE DE PRÉDICTION INTERACTIVE")

def interactive_prediction():
    print("\nEntrez les caractéristiques de la propriété:")
    print("(Appuyez sur Ctrl+C pour quitter)\n")
    
    while True:
        try:
            # Collecte des inputs
            print("─" * 80)
            room_count = int(input("Nombre de chambres: "))
            bathroom_count = int(input("Salles de bain: "))
            size = float(input("Surface (m²): "))
            
            print(f"\nVilles disponibles: {', '.join(VILLES[:10])}... (et autres)")
            location = input("Ville: ").strip()
            if location not in VILLES:
                print(f"Ville inconnue. Utilisation de 'Tunis' par défaut.")
                location = 'Tunis'
            
            print(f"\nCatégories: {', '.join(CATEGORIES[:5])}... (et autres)")
            category = input("Catégorie: ").strip()
            if category not in CATEGORIES:
                print(f"Catégorie inconnue. Utilisation de 'Appartements' par défaut.")
                category = 'Appartements'
            
            print(f"\nType de transaction: {', '.join(TYPES_TRANSACTION)}")
            transaction_type = input("Type: ").strip()
            if transaction_type not in TYPES_TRANSACTION:
                print(f"Type inconnu. Utilisation de 'À Vendre' par défaut.")
                transaction_type = 'À Vendre'
            
            # Prédiction
            print("\nCalcul de la prédiction...\n")
            price, conf_low, conf_high = predict_price(
                room_count, bathroom_count, size, location, category, transaction_type
            )
            
            print("=" * 80)
            print("RÉSULTAT DE LA PRÉDICTION")
            print("=" * 80)
            
            if transaction_type == 'À Louer':
                print(f"\n  Prix estimé: {price:,.0f} TND/mois")
                print(f"  Intervalle 80%: {conf_low:,.0f} - {conf_high:,.0f} TND/mois")
            else:
                print(f"\n  Prix estimé: {price:,.0f} TND")
                print(f"  Intervalle 80%: {conf_low:,.0f} - {conf_high:,.0f} TND")
            
            print("\n" + "=" * 80)
            
            # Demander si continuer
            continue_input = input("\nFaire une autre prédiction? (o/n): ").strip().lower()
            if continue_input != 'o':
                break
                
        except KeyboardInterrupt:
            print("\n\nAu revoir!")
            break
        except ValueError as e:
            print(f"\nErreur: Entrée invalide. Veuillez entrer des nombres valides.")
        except Exception as e:
            print(f"\nErreur: {str(e)}")

# interactive_prediction()

INTERFACE DE PRÉDICTION INTERACTIVE
