## 1235

In [1]:
import os
import pickle
import pandas as pd
from statsmodels.nonparametric.smoothers_lowess import lowess

# ------------------------------
# 1️⃣ BASE DIRECTORY – MODELS PER AREA
# ------------------------------
BASE_DIR = os.path.join(
    os.path.expanduser("~"),
    "Downloads",
    "FLIPOSE_DATA",
    "Modular_Code",
    "new_18_areas_dt"
)

models_dir = os.path.join(BASE_DIR,"dt_models")
trained_dir = os.path.join(BASE_DIR,"trained_columns")

# ------------------------------
# 2️⃣ LOAD TRAINING COLUMNS
# ------------------------------
def load_columns(area_name_en):
    area_name_en = area_name_en.replace("_", " ").strip()
    for f in os.listdir(trained_dir):
        if f.lower() == f"trained_columns_{area_name_en}.pkl".lower() or \
           f.lower() == f"trained_columns_{area_name_en.replace(' ', '_')}.pkl".lower():
            with open(os.path.join(trained_dir, f), "rb") as file:
                return pickle.load(file)
    raise FileNotFoundError(f"❌ model_columns file not found for area '{area_name_en}'")

# ------------------------------
# 3️⃣ LOAD MODEL
# ------------------------------
def load_model(area_name_en):
    area_name_en = area_name_en.replace("_", " ").strip()
    for f in os.listdir(models_dir):
        if f.lower() == f"dt_model_{area_name_en}.pkl".lower() or \
           f.lower() == f"dt_model_{area_name_en.replace(' ', '_')}.pkl".lower():
            with open(os.path.join(models_dir, f), "rb") as file:
                return pickle.load(file)
    raise FileNotFoundError(f"❌ Model file not found for area '{area_name_en}'")

# ------------------------------
# 4️⃣ PREDICTION FUNCTION
# ------------------------------
def predict_with_area(input_data):
    lowess_frac=0.04
    forecast_df = pd.read_csv("Sarima_forecast_6M.csv")
    historic_df = pd.read_csv("historical_df.csv")
    area = input_data["area_name_en"].replace("_", " ").strip()

    # Step 1: Load model + expected columns
    train_columns = load_columns(area)
    model = load_model(area)

    # Step 2: One-hot encode input
    temp = pd.DataFrame([input_data])
    temp["area_name_en"] = area
    temp = pd.get_dummies(temp)
    for col in train_columns:
        if col not in temp.columns:
            temp[col] = 0
    temp = temp[train_columns]

    # Step 3: Predict median price
    predicted_price = model.predict(temp)[0]
    print("Raw Model Prediction:", predicted_price)

    # Step 4: Prepare forecast dataframe for area
    forecast_area = forecast_df[forecast_df["area_name_en"].replace("_", " ") == area].copy()
    forecast_area["median_price"] = predicted_price * forecast_area["growth_factor"]
    forecast_area = forecast_area[["month", "median_price"]]

    # Step 5: Prepare historic dataframe for area
    historic_area = historic_df[historic_df["area_name_en"].replace("_", " ") == area].copy()

    # Step 6: Apply LOWESS smoothing on historic using index as x
    if not historic_area.empty:
        historic_area = historic_area.sort_values("month").reset_index(drop=True)
        x = historic_area.index.values  # use index for LOWESS
        smoothed = lowess(
            endog=historic_area["median_price"].values,
            exog=x,
            frac=lowess_frac
        )
        historic_area["median_price"] = smoothed[:, 1]

        # Replace last historic value with first forecast median price
        if not forecast_area.empty:
            historic_area.loc[historic_area.index[-1], "median_price"] = forecast_area.iloc[0]["median_price"]

    # Step 7: Combine historic + forecast
    final_df = pd.concat([historic_area[["month", "median_price"]], forecast_area], ignore_index=True)
    final_df = final_df.reset_index(drop=True)

    return final_df


In [2]:
area_list =['Al Barsha South Fifth',
 'Al Barsha South Fourth',
 'Al Barshaa South Third',
 'Al Hebiah Fourth',
 'Al Khairan First',
 'Al Merkadh',
 'Al Thanyah Fifth',
 'Al Yelayiss 2',
 'Burj Khalifa',
 'Business Bay',
 'Hadaeq Sheikh Mohammed Bin Rashid',
 'Jabal Ali First',
 'Madinat Al Mataar',
 'Madinat Dubai Almelaheyah',
 'Marsa Dubai',
 "Me'Aisem First",
 'Nadd Hessa',
 'Wadi Al Safa 5']

In [8]:
input_data = {
    "area_name_en":  'Al Barshaa South Third',
    "procedure_area": 80,
    "has_parking": 1,
    "floor_bin": "11-20",
    "rooms_en": "1BR",
    "swimming_pool": 1,
    "balcony": 1,
    "elevator": 1,
    "metro" : 1
}

final_df = predict_with_area(input_data)

final_df.tail(10)

Raw Model Prediction: 15683.806666666665


Unnamed: 0,month,median_price
67,2025-08-01,14222.22
68,2025-09-01,14901.54
69,2025-10-01,14864.25
70,2025-11-01,15707.078432
71,2025-12-01,15707.078432
72,2026-01-01,15786.965022
73,2026-02-01,15911.23111
74,2026-03-01,16056.563555
75,2026-04-01,16197.611138
76,2026-05-01,16314.973364
