In [30]:
import os
import pickle
import pandas as pd
from statsmodels.nonparametric.smoothers_lowess import lowess

# ============================================================
# 1️⃣ BASE DIRECTORY – MODELS PER AREA
# ============================================================

BASE_DIR = os.path.join(
    os.path.expanduser("~"),
    "Downloads",
    "FLIPOSE_DATA",
    "Modular_Code",
    "Modular_code with 18 areas_RF"
)

print("Using Base Directory:\n", BASE_DIR)
print("\nFiles in directory:", os.listdir(BASE_DIR))


# ============================================================
# 2️⃣ LOAD TRAINING COLUMNS (per area)
# ============================================================

def load_columns(area_name_en):
    """
    Load the expected training columns for a given area.
    Case-insensitive and handles spaces/underscores in names.
    """
    # Convert underscores → spaces for consistency
    area_name_en = area_name_en.replace("_", " ").strip()

    # Build expected filename pattern
    safe = area_name_en.replace(" ", "_")
    expected = f"model_columns_{safe}.pkl"

    # Search ignoring case
    for f in os.listdir(BASE_DIR):
        if f.lower() == expected.lower():
            with open(os.path.join(BASE_DIR, f), "rb") as file:
                return pickle.load(file)

    raise FileNotFoundError(
        f"❌ model_columns file not found for area '{area_name_en}'\n"
        f"Expected filename (case-insensitive): {expected}\n"
        f"Available files: {os.listdir(BASE_DIR)}"
    )


# ============================================================
# 3️⃣ LOAD MODEL (per area)
# ============================================================

def load_model(area_name_en):
    """
    Load the trained model for a given area.
    Case-insensitive and handles spaces/underscores in names.
    """
    area_name_en = area_name_en.replace("_", " ").strip()

    safe = area_name_en.replace(" ", "_")
    expected = f"rf_model_{safe}.pkl"

    for f in os.listdir(BASE_DIR):
        if f.lower() == expected.lower():
            with open(os.path.join(BASE_DIR, f), "rb") as file:
                return pickle.load(file)

    raise FileNotFoundError(
        f"❌ Model file not found for area '{area_name_en}'\n"
        f"Expected filename (case-insensitive): {expected}\n"
        f"Available files: {os.listdir(BASE_DIR)}"
    )


# ============================================================
# 4️⃣ MAIN PREDICTION FUNCTION
# ============================================================

def predict_with_area(input_data, forecast_df, historic_df):
    area = input_data["area_name_en"].replace("_", " ").strip()

    # Step 1: Load model + expected columns
    train_columns = load_columns(area)
    model = load_model(area)

    # Step 2: One-hot encode input
    temp = pd.DataFrame([input_data])
    temp["area_name_en"] = area  # ensure correct area
    temp = pd.get_dummies(temp)

    # Add missing training columns
    for col in train_columns:
        if col not in temp.columns:
            temp[col] = 0

    temp = temp[train_columns]

    # Step 3: Predict median price
    predicted_price = model.predict(temp)[0]
    print("Raw Model Prediction:", predicted_price)

    # Step 4: Clean forecast_df column names
    if "year_month" in forecast_df.columns:
        forecast_df = forecast_df.rename(columns={"year_month": "month"})

    gf = forecast_df[forecast_df["area_name_en"] == area].copy()
    gf["median_price"] = predicted_price * gf["growth_factor"]

    # Step 5: Clean historic_df column names
    if "year_month" in historic_df.columns:
        historic_df = historic_df.rename(columns={"year_month": "month"})
    if "meter_sale_price" in historic_df.columns:
        historic_df = historic_df.rename(columns={"meter_sale_price": "median_price"})

    # Step 6: Filter historic for specific area
    if historic_df.empty or "area_name_en" not in historic_df.columns:
        print(f"⚠️ No historic data found for {area}.")
        historic = pd.DataFrame()
    else:
        historic = historic_df[historic_df["area_name_en"] == area].copy()

    # Step 7: LOWESS smoothing
    if not historic.empty:
        historic = historic.sort_values("month")
        x = historic["month"].astype("int64") / 10**9

        smoothed = lowess(
            endog=historic["median_price"],
            exog=x,
            frac=0.03
        )

        historic["median_price"] = smoothed[:, 1]
        historic.loc[historic.index[-1], "median_price"] = predicted_price

    # Step 8: Clean & combine outputs
    for df_ in [historic, gf]:
        if "area_name_en" in df_.columns:
            df_.drop(columns=["area_name_en"], inplace=True)

    final_df = pd.concat([historic, gf], ignore_index=True)
    final_df = final_df[["month", "median_price"]]
    final_df = final_df.sort_values("month").reset_index(drop=True)

    return final_df


Using Base Directory:
 C:\Users\pooja\Downloads\FLIPOSE_DATA\Modular_Code\Modular_code with 18 areas_RF

Files in directory: ['.ipynb_checkpoints', 'historical_data.csv', 'historical_df.csv', 'model_columns_Al Barsha South Fifth.pkl', 'model_columns_Al Barsha South Fourth.pkl', 'model_columns_Al Barshaa South Third.pkl', 'model_columns_Al Hebiah Fourth.pkl', 'model_columns_Al Khairan First.pkl', 'model_columns_Al Merkadh.pkl', 'model_columns_Al Thanyah Fifth.pkl', 'model_columns_Al Yelayiss 2.pkl', 'model_columns_Burj Khalifa.pkl', 'model_columns_Business Bay.pkl', 'model_columns_Hadaeq Sheikh Mohammed Bin Rashid.pkl', 'model_columns_Jabal Ali First.pkl', 'model_columns_Madinat Al Mataar.pkl', 'model_columns_Madinat Dubai Almelaheyah.pkl', 'model_columns_Marsa Dubai.pkl', "model_columns_Me'Aisem First.pkl", 'model_columns_Nadd Hessa.pkl', 'model_columns_Wadi Al Safa 5.pkl', 'Price_predictor.ipynb', 'rf_model_Al Barsha South Fifth.pkl', 'rf_model_Al Merkadh.pkl', 'rf_model_Madinat Dubai

In [33]:
# ============================================================
# 5. USER INPUT TEST
# ============================================================

forecast_df = pd.read_csv("Sarima_forecast_6M.csv")
historic_df = pd.read_csv("historical_df.csv")

input_data = {
    "area_name_en": "Al_Barsha_South_Fifth",
    "procedure_area": 100,
    "has_parking": 1,
    "floor_bin": "41-50",
    "rooms_en": "1BR",
    "swimming_pool": 1,
    "balcony": 1,
    "elevator": 1
}

output = predict_with_area(input_data, forecast_df, historic_df)
print(output)

FileNotFoundError: ❌ model_columns file not found for area 'Al Barsha South Fifth'
Expected filename (case-insensitive): model_columns_Al_Barsha_South_Fifth.pkl
Available files: ['.ipynb_checkpoints', 'historical_data.csv', 'historical_df.csv', 'model_columns_Al Barsha South Fifth.pkl', 'model_columns_Al Barsha South Fourth.pkl', 'model_columns_Al Barshaa South Third.pkl', 'model_columns_Al Hebiah Fourth.pkl', 'model_columns_Al Khairan First.pkl', 'model_columns_Al Merkadh.pkl', 'model_columns_Al Thanyah Fifth.pkl', 'model_columns_Al Yelayiss 2.pkl', 'model_columns_Burj Khalifa.pkl', 'model_columns_Business Bay.pkl', 'model_columns_Hadaeq Sheikh Mohammed Bin Rashid.pkl', 'model_columns_Jabal Ali First.pkl', 'model_columns_Madinat Al Mataar.pkl', 'model_columns_Madinat Dubai Almelaheyah.pkl', 'model_columns_Marsa Dubai.pkl', "model_columns_Me'Aisem First.pkl", 'model_columns_Nadd Hessa.pkl', 'model_columns_Wadi Al Safa 5.pkl', 'Price_predictor.ipynb', 'rf_model_Al Barsha South Fifth.pkl', 'rf_model_Al Merkadh.pkl', 'rf_model_Madinat Dubai Almelaheyah.pkl', 'Sarima_forecast_6M.csv']

In [39]:
import os
import pickle
import pandas as pd
from statsmodels.nonparametric.smoothers_lowess import lowess

# ============================================================
# 1️⃣ BASE DIRECTORY – MODELS PER AREA
# ============================================================

BASE_DIR = os.path.join(
    os.path.expanduser("~"),
    "Downloads",
    "FLIPOSE_DATA",
    "Modular_Code",
    "Modular_code with 18 areas_RF"
)

print("Using Base Directory:\n", BASE_DIR)
print("\nFiles in directory:", os.listdir(BASE_DIR))


# ============================================================
# 2️⃣ LOAD TRAINING COLUMNS (per area)
# ============================================================

def load_columns(area_name_en):
    """
    Load the expected training columns for a given area.
    Handles spaces/underscores and is case-insensitive.
    """
    area_name_en = area_name_en.replace("_", " ").strip()
    
    for f in os.listdir(BASE_DIR):
        if f.lower() == f"model_columns_{area_name_en}.pkl".lower() or \
           f.lower() == f"model_columns_{area_name_en.replace(' ', '_')}.pkl".lower():
            with open(os.path.join(BASE_DIR, f), "rb") as file:
                return pickle.load(file)
    
    raise FileNotFoundError(
        f"❌ model_columns file not found for area '{area_name_en}'\n"
        f"Available files: {os.listdir(BASE_DIR)}"
    )


# ============================================================
# 3️⃣ LOAD MODEL (per area)
# ============================================================

def load_model(area_name_en):
    """
    Load the trained model for a given area.
    Handles spaces/underscores and is case-insensitive.
    """
    area_name_en = area_name_en.replace("_", " ").strip()
    
    for f in os.listdir(BASE_DIR):
        if f.lower() == f"rf_model_{area_name_en}.pkl".lower() or \
           f.lower() == f"rf_model_{area_name_en.replace(' ', '_')}.pkl".lower():
            with open(os.path.join(BASE_DIR, f), "rb") as file:
                return pickle.load(file)
    
    raise FileNotFoundError(
        f"❌ Model file not found for area '{area_name_en}'\n"
        f"Available files: {os.listdir(BASE_DIR)}"
    )


# ============================================================
# 4️⃣ MAIN PREDICTION FUNCTION
# ============================================================

def predict_with_area(input_data, forecast_df, historic_df):
    area = input_data["area_name_en"].replace("_", " ").strip()

    # Step 1: Load model + expected columns
    train_columns = load_columns(area)
    model = load_model(area)

    # Step 2: One-hot encode input
    temp = pd.DataFrame([input_data])
    temp["area_name_en"] = area  # ensure correct area
    temp = pd.get_dummies(temp)

    # Add missing training columns
    for col in train_columns:
        if col not in temp.columns:
            temp[col] = 0

    temp = temp[train_columns]

    # Step 3: Predict median price
    predicted_price = model.predict(temp)[0]
    print("Raw Model Prediction:", predicted_price)

    # Step 4: Clean forecast_df column names
    #if "year_month" in forecast_df.columns:
        #forecast_df = forecast_df.rename(columns={"year_month": "month"})

    gf = forecast_df[forecast_df["area_name_en"].replace("_", " ") == area].copy()
    gf["median_price"] = predicted_price * gf["growth_factor"]

    # Step 5: Clean historic_df column names
    if "year_month" in historic_df.columns:
        historic_df = historic_df.rename(columns={"year_month": "month"})
    if "meter_sale_price" in historic_df.columns:
        historic_df = historic_df.rename(columns={"meter_sale_price": "median_price"})

    # Step 6: Filter historic for specific area
    if historic_df.empty or "area_name_en" not in historic_df.columns:
        print(f"⚠️ No historic data found for {area}.")
        historic = pd.DataFrame()
    else:
        historic = historic_df[historic_df["area_name_en"].replace("_", " ") == area].copy()

    # Step 7: LOWESS smoothing
    if not historic.empty:
        historic = historic.sort_values("month")
        x = historic["month"].astype("int64") / 10**9

        smoothed = lowess(
            endog=historic["median_price"],
            exog=x,
            frac=0.03
        )

        historic["median_price"] = smoothed[:, 1]
        historic.loc[historic.index[-1], "median_price"] = predicted_price

    # Step 8: Clean & combine outputs
    for df_ in [historic, gf]:
        if "area_name_en" in df_.columns:
            df_.drop(columns=["area_name_en"], inplace=True)

    final_df = pd.concat([historic, gf], ignore_index=True)
    final_df = final_df[["month", "median_price"]]
    final_df = final_df.sort_values("month").reset_index(drop=True)

    return final_df




Using Base Directory:
 C:\Users\pooja\Downloads\FLIPOSE_DATA\Modular_Code\Modular_code with 18 areas_RF

Files in directory: ['.ipynb_checkpoints', 'historical_data.csv', 'historical_df.csv', 'model_columns_Al Barsha South Fifth.pkl', 'model_columns_Al Barsha South Fourth.pkl', 'model_columns_Al Barshaa South Third.pkl', 'model_columns_Al Hebiah Fourth.pkl', 'model_columns_Al Khairan First.pkl', 'model_columns_Al Merkadh.pkl', 'model_columns_Al Thanyah Fifth.pkl', 'model_columns_Al Yelayiss 2.pkl', 'model_columns_Burj Khalifa.pkl', 'model_columns_Business Bay.pkl', 'model_columns_Hadaeq Sheikh Mohammed Bin Rashid.pkl', 'model_columns_Jabal Ali First.pkl', 'model_columns_Madinat Al Mataar.pkl', 'model_columns_Madinat Dubai Almelaheyah.pkl', 'model_columns_Marsa Dubai.pkl', "model_columns_Me'Aisem First.pkl", 'model_columns_Nadd Hessa.pkl', 'model_columns_Wadi Al Safa 5.pkl', 'Price_predictor.ipynb', 'rf_model_Al Barsha South Fifth.pkl', 'rf_model_Al Merkadh.pkl', 'rf_model_Madinat Dubai

In [40]:
# ============================================================
# 5️⃣ USER INPUT TEST
# ============================================================

forecast_df = pd.read_csv("Sarima_forecast_6M.csv")
historic_df = pd.read_csv("historical_df.csv")

input_data = {
    "area_name_en": "Al_Barsha_South_Fifth",
    "procedure_area": 150,
    "has_parking": 1,
    "floor_bin": "41-50",
    "rooms_en": "4BR",
    "swimming_pool": 1,
    "balcony": 1,
    "elevator": 1
}

output = predict_with_area(input_data, forecast_df, historic_df)
print(output)


Raw Model Prediction: 8726.947816469818


ValueError: invalid literal for int() with base 10: '2020-01-01'

In [41]:
historic_df

Unnamed: 0.1,Unnamed: 0,area_name_en,month,median_price
0,817,Burj Khalifa,2020-01-01,17944.075
1,818,Madinat Al Mataar,2020-01-01,6536.165
2,819,Wadi Al Safa 5,2020-01-01,4918.840
3,820,Me'Aisem First,2020-01-01,6401.910
4,821,Marsa Dubai,2020-01-01,12994.990
...,...,...,...,...
1250,2067,Nadd Hessa,2025-11-01,9486.420
1251,2068,Al Barshaa South Third,2025-11-01,15360.500
1252,2069,Al Barsha South Fourth,2025-11-01,15463.110
1253,2070,Burj Khalifa,2025-11-01,34356.810


In [47]:
import os
import pickle
import pandas as pd
from statsmodels.nonparametric.smoothers_lowess import lowess

# ============================================================
# 1️⃣ BASE DIRECTORY – MODELS PER AREA
# ============================================================

BASE_DIR = os.path.join(
    os.path.expanduser("~"),
    "Downloads",
    "FLIPOSE_DATA",
    "Modular_Code",
    "Modular_code with 18 areas_RF"
)

print("Using Base Directory:\n", BASE_DIR)
print("\nFiles in directory:", os.listdir(BASE_DIR))


# ============================================================
# 2️⃣ LOAD TRAINING COLUMNS (per area)
# ============================================================

def load_columns(area_name_en):
    area_name_en = area_name_en.replace("_", " ").strip()
    
    for f in os.listdir(BASE_DIR):
        if f.lower() == f"model_columns_{area_name_en}.pkl".lower() or \
           f.lower() == f"model_columns_{area_name_en.replace(' ', '_')}.pkl".lower():
            with open(os.path.join(BASE_DIR, f), "rb") as file:
                return pickle.load(file)
    
    raise FileNotFoundError(
        f"❌ model_columns file not found for area '{area_name_en}'\n"
        f"Available files: {os.listdir(BASE_DIR)}"
    )


# ============================================================
# 3️⃣ LOAD MODEL (per area)
# ============================================================

def load_model(area_name_en):
    area_name_en = area_name_en.replace("_", " ").strip()
    
    for f in os.listdir(BASE_DIR):
        if f.lower() == f"rf_model_{area_name_en}.pkl".lower() or \
           f.lower() == f"rf_model_{area_name_en.replace(' ', '_')}.pkl".lower():
            with open(os.path.join(BASE_DIR, f), "rb") as file:
                return pickle.load(file)
    
    raise FileNotFoundError(
        f"❌ Model file not found for area '{area_name_en}'\n"
        f"Available files: {os.listdir(BASE_DIR)}"
    )


# ============================================================
# 4️⃣ MAIN PREDICTION FUNCTION
# ============================================================

def predict_with_area(input_data, forecast_df, historic_df):
    area = input_data["area_name_en"].replace("_", " ").strip()

    # Step 1: Load model + expected columns
    train_columns = load_columns(area)
    model = load_model(area)

    # Step 2: One-hot encode input
    temp = pd.DataFrame([input_data])
    temp["area_name_en"] = area  # ensure correct area
    temp = pd.get_dummies(temp)

    # Add missing training columns
    for col in train_columns:
        if col not in temp.columns:
            temp[col] = 0

    temp = temp[train_columns]

    # Step 3: Predict median price
    predicted_price = model.predict(temp)[0]
    print("Raw Model Prediction:", predicted_price)

    # Step 4: Clean forecast_df column names
    if "year_month" in forecast_df.columns:
        forecast_df = forecast_df.rename(columns={"year_month": "month"})

    gf = forecast_df[forecast_df["area_name_en"].replace("_", " ") == area].copy()
    gf["median_price"] = predicted_price * gf["growth_factor"]

    # Step 5: Clean historic_df column names
    if "year_month" in historic_df.columns:
        historic_df = historic_df.rename(columns={"year_month": "month"})
    if "meter_sale_price" in historic_df.columns:
        historic_df = historic_df.rename(columns={"meter_sale_price": "median_price"})

    # Step 6: Filter historic for specific area
    if historic_df.empty or "area_name_en" not in historic_df.columns:
        print(f"⚠️ No historic data found for {area}.")
        historic = pd.DataFrame()
    else:
        historic = historic_df[historic_df["area_name_en"].replace("_", " ") == area].copy()

    # Step 7: LOWESS smoothing
    if not historic.empty:
        historic = historic.sort_values("month")
        # Convert to datetime first
        historic["month"] = pd.to_datetime(historic["month"])
        x = historic["month"].astype("int64") #// 10**9  # UNIX timestamp in seconds

        smoothed = lowess(
            endog=historic["median_price"],
            exog=x,
            frac=0.03
        )

        historic["median_price"] = smoothed[:, 1]
        historic.loc[historic.index[-1], "median_price"] = predicted_price

    # Step 8: Clean & combine outputs
    for df_ in [historic, gf]:
        if "area_name_en" in df_.columns:
            df_.drop(columns=["area_name_en"], inplace=True)

    final_df = pd.concat([historic, gf], ignore_index=True)
    final_df = final_df[["month", "median_price"]]
    final_df = final_df.sort_values("month").reset_index(drop=True)

    return final_df





Using Base Directory:
 C:\Users\pooja\Downloads\FLIPOSE_DATA\Modular_Code\Modular_code with 18 areas_RF

Files in directory: ['.ipynb_checkpoints', 'historical_data.csv', 'historical_df.csv', 'model_columns_Al Barsha South Fifth.pkl', 'model_columns_Al Barsha South Fourth.pkl', 'model_columns_Al Barshaa South Third.pkl', 'model_columns_Al Hebiah Fourth.pkl', 'model_columns_Al Khairan First.pkl', 'model_columns_Al Merkadh.pkl', 'model_columns_Al Thanyah Fifth.pkl', 'model_columns_Al Yelayiss 2.pkl', 'model_columns_Burj Khalifa.pkl', 'model_columns_Business Bay.pkl', 'model_columns_Hadaeq Sheikh Mohammed Bin Rashid.pkl', 'model_columns_Jabal Ali First.pkl', 'model_columns_Madinat Al Mataar.pkl', 'model_columns_Madinat Dubai Almelaheyah.pkl', 'model_columns_Marsa Dubai.pkl', "model_columns_Me'Aisem First.pkl", 'model_columns_Nadd Hessa.pkl', 'model_columns_Wadi Al Safa 5.pkl', 'Price_predictor.ipynb', 'rf_model_Al Barsha South Fifth.pkl', 'rf_model_Al Merkadh.pkl', 'rf_model_Madinat Dubai

In [48]:
# ============================================================
# 5️⃣ USER INPUT TEST
# ============================================================

forecast_df = pd.read_csv("Sarima_forecast_6M.csv")
historic_df = pd.read_csv("historical_df.csv")

input_data = {
    "area_name_en": "Al_Barsha_South_Fifth",
    "procedure_area": 150,
    "has_parking": 1,
    "floor_bin": "41-50",
    "rooms_en": "4BR",
    "swimming_pool": 1,
    "balcony": 1,
    "elevator": 1
}

output = predict_with_area(input_data, forecast_df, historic_df)
print(output)

Raw Model Prediction: 8726.947816469816


TypeError: '<' not supported between instances of 'str' and 'Timestamp'

## 1235

In [9]:
import os
import pickle
import pandas as pd
from statsmodels.nonparametric.smoothers_lowess import lowess

# ------------------------------
# 1️⃣ BASE DIRECTORY – MODELS PER AREA
# ------------------------------
# C:\Users\anant\OneDrive\Desktop\truEstates\Modular_code with 18 areas_RF\area_models_rf
BASE_DIR = os.path.join(
    os.path.expanduser("~"),
    # "C:",
    # "Users",
    # "anant",
    "OneDrive",
    "Desktop",
    "truEstates",
    "Modular_code with 18 areas_RF"
)
    # "Modular_Code",
    # "Modular_code with 18 areas_RF"

AREA_DIR = os.path.join(BASE_DIR, "area_models_rf") 
COLUMNS_DIR = os.path.join(BASE_DIR, "training_columns")  
# ------------------------------
# 2️⃣ LOAD TRAINING COLUMNS
# ------------------------------
def load_columns(area_name_en):
    area_name_en = area_name_en.replace("_", " ").strip()
    for f in os.listdir(COLUMNS_DIR):
        if f.lower() == f"model_columns_{area_name_en}.pkl".lower() or \
           f.lower() == f"model_columns_{area_name_en.replace(' ', '_')}.pkl".lower():
            with open(os.path.join(COLUMNS_DIR, f), "rb") as file:
                return pickle.load(file)
    raise FileNotFoundError(f"❌ model_columns file not found for area '{area_name_en}'")

# ------------------------------
# 3️⃣ LOAD MODEL
# ------------------------------
def load_model(area_name_en):
    area_name_en = area_name_en.replace("_", " ").strip()
    for f in os.listdir(AREA_DIR):
        if f.lower() == f"rf_model_{area_name_en}.pkl".lower() or \
           f.lower() == f"rf_model_{area_name_en.replace(' ', '_')}.pkl".lower():
            with open(os.path.join(AREA_DIR, f), "rb") as file:
                return pickle.load(file)
    raise FileNotFoundError(f"❌ Model file not found for area '{area_name_en}'")

# ------------------------------
# 4️⃣ PREDICTION FUNCTION
# ------------------------------
def predict_with_area(input_data):
    lowess_frac=0.03
    forecast_df = pd.read_csv("Sarima_forecast_6M.csv")
    historic_df = pd.read_csv("historical_df.csv")
    area = input_data["area_name_en"].replace("_", " ").strip()

    # Step 1: Load model + expected columns
    train_columns = load_columns(area)
    model = load_model(area)

    # Step 2: One-hot encode input
    temp = pd.DataFrame([input_data])
    temp["area_name_en"] = area
    temp = pd.get_dummies(temp)
    for col in train_columns:
        if col not in temp.columns:
            temp[col] = 0
    temp = temp[train_columns]

    # Step 3: Predict median price
    predicted_price = model.predict(temp)[0]
    print("Raw Model Prediction:", predicted_price)

    # Step 4: Prepare forecast dataframe for area
    forecast_area = forecast_df[forecast_df["area_name_en"].replace("_", " ") == area].copy()
    forecast_area["median_price"] = predicted_price * forecast_area["growth_factor"]
    forecast_area = forecast_area[["month", "median_price"]]

    # Step 5: Prepare historic dataframe for area
    historic_area = historic_df[historic_df["area_name_en"].replace("_", " ") == area].copy()

    # Step 6: Apply LOWESS smoothing on historic using index as x
    if not historic_area.empty:
        historic_area = historic_area.sort_values("month").reset_index(drop=True)
        x = historic_area.index.values  # use index for LOWESS
        smoothed = lowess(
            endog=historic_area["median_price"].values,
            exog=x,
            frac=lowess_frac
        )
        historic_area["median_price"] = smoothed[:, 1]

        # Replace last historic value with first forecast median price
        if not forecast_area.empty:
            historic_area.loc[historic_area.index[-1], "median_price"] = forecast_area.iloc[0]["median_price"]

    # Step 7: Combine historic + forecast
    final_df = pd.concat([historic_area[["month", "median_price"]], forecast_area], ignore_index=True)
    final_df = final_df.reset_index(drop=True)

    return final_df


In [96]:
area_list =['Al Barsha South Fifth',
 'Al Barsha South Fourth',
 'Al Barshaa South Third',
 'Al Hebiah Fourth',
 'Al Khairan First',
 'Al Merkadh',
 'Al Thanyah Fifth',
 'Al Yelayiss 2',
 'Burj Khalifa',
 'Business Bay',
 'Hadaeq Sheikh Mohammed Bin Rashid',
 'Jabal Ali First',
 'Madinat Al Mataar',
 'Madinat Dubai Almelaheyah',
 'Marsa Dubai',
 "Me'Aisem First",
 'Nadd Hessa',
 'Wadi Al Safa 5']

In [11]:
input_data = {
    "area_name_en":  'Al Hebiah Fourth',
    "procedure_area": 70,
    "has_parking": 1,
    "floor_bin": "11-20",
    "rooms_en": "1BR",
    "swimming_pool": 1,
    "balcony": 1,
    "elevator": 1,
    "metro" : 0
}

final_df = predict_with_area(input_data)

final_df.tail(10)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Raw Model Prediction: 14245.759458443812


Unnamed: 0,month,median_price
67,2025-08-01,11774.15
68,2025-09-01,10759.825
69,2025-10-01,10778.27
70,2025-11-01,14714.12227
71,2025-12-01,14714.12227
72,2026-01-01,14962.070428
73,2026-02-01,15093.332534
74,2026-03-01,15162.82182
75,2026-04-01,15199.608986
76,2026-05-01,15219.083867
