In [16]:
DATE_FORMAT

'%d-%m-%Y'

In [17]:
import pandas as pd
import pickle
import os
import numpy as np
import configparser
from datetime import datetime, timedelta
from statsmodels.nonparametric.smoothers_lowess import lowess

# Load configuration with interpolation disabled (prevents issues with % in DATE_FORMAT)
CONFIG_FILE = 'config.ini'
cfg = configparser.ConfigParser(interpolation=None)
cfg.read(CONFIG_FILE)

# --- CONFIGURATION CONSTANTS ---
OHE_FILE = cfg.get('GENERAL', 'OHE_FILE', fallback='onehot_encoder.pkl')
COLUMNS_FILE = cfg.get('GENERAL', 'COLUMNS_FILE', fallback='train_columns.pkl')
TRAINING_DATA_FILE = cfg.get('GENERAL', 'TRAINING_DATA_FILE', fallback='df_trained_dataset_6000.csv')
FORECAST_DATA_FILE = cfg.get('GENERAL', 'FORECAST_DATA_FILE', fallback='Sarima_forecast_6M.csv')

try:
    LOESS_FRAC = cfg.getfloat('GENERAL', 'LOESS_FRAC', fallback=0.1)
except Exception:
    LOESS_FRAC = 0.1
try:
    LOESS_IT = cfg.getint('GENERAL', 'LOESS_IT', fallback=3)
except Exception:
    LOESS_IT = 3

DATE_FORMAT = cfg.get('GENERAL', 'DATE_FORMAT', fallback='%d-%m-%Y')  # Standard DD-MM-YYYY format

class PropertyPricePredictor:
    """
    A modular class to load models and data, prepare inputs, predict property
    prices, and perform historical trend and future forecast analysis.
    """

    # AREA_FILE_MAP will be populated from `config.ini` (section: AREA_FILE_PATHS)
    # Keys in the config are simplified area identifiers (e.g. al_barsha_south_fifth)
    # Values are full paths to the .pkl model files.
    AREA_FILE_MAP = {}

    def __init__(self):
        # Read area file paths from config and then load models
        self.AREA_FILE_MAP = self._read_area_file_paths()
        self.area_models = self._load_area_models()
        self.ohe, self.train_columns = self._load_encoder_and_columns()
        self.train_data = self._load_training_data()
        self.growth_pivot = self._load_forecasting_data()

        if not self.area_models:
            print("‚ùå WARNING: No area models loaded. Prediction will fail.")
        if self.ohe is None or self.train_columns is None:
            print("‚ùå WARNING: Encoder or training columns failed to load.")

    def _load_area_models(self) -> dict:
        loaded_models = {}
        missing_models = []
        # AREA_FILE_MAP contains {config_key: full_path}; load and map to display name
        for cfg_key, model_path in self.AREA_FILE_MAP.items():
            # Derive the displayable area name from the model filename (keeps capitalization and apostrophes)
            try:
                basename = os.path.basename(model_path)
                area_name = basename.replace('dt_model_', '').replace('.pkl', '').replace('_', ' ').strip()
                # Validate file existence
                if not os.path.exists(model_path):
                    raise FileNotFoundError(f"Model file not found: {model_path}")
                with open(model_path, 'rb') as f:
                    loaded_models[area_name] = pickle.load(f)
            except FileNotFoundError:
                missing_models.append(model_path)
            except Exception as e:
                print(f"‚ùå Error loading {model_path}: {e}")
        if missing_models:
            print(f"‚ö†Ô∏è Missing models: {len(missing_models)} files could not be found. (This is expected in a sandbox environment without the actual files.)")
        return loaded_models

    def _read_area_file_paths(self) -> dict:
        """Read AREA_FILE_PATHS section from CONFIG_FILE and return a dict of key->path."""
        try:
            read_files = cfg.read(CONFIG_FILE)
            if not read_files:
                print(f"‚ö†Ô∏è Could not read config file: {CONFIG_FILE}. Using empty area map.")
                return {}
            if 'AREA_FILE_PATHS' not in cfg:
                print(f"‚ö†Ô∏è 'AREA_FILE_PATHS' section not found in {CONFIG_FILE}. Using empty area map.")
                return {}
            # configparser lower-cases option names by default; values remain as provided
            return dict(cfg['AREA_FILE_PATHS'])
        except Exception as e:
            print(f"‚ùå Error reading config file {CONFIG_FILE}: {e}")
            return {}

    def _load_encoder_and_columns(self):
        ohe = None
        train_columns = None
        try:
            # Mock loading since actual files are not present in this context
            if not os.path.exists(OHE_FILE):
                raise FileNotFoundError(f"OHE file not found: {OHE_FILE}")
            with open(OHE_FILE, 'rb') as f:
                ohe = pickle.load(f)
        except Exception as e:
            print(f"‚ùå Error loading One-Hot Encoder: {e}")
        try:
            # Mock loading since actual files are not present in this context
            if not os.path.exists(COLUMNS_FILE):
                raise FileNotFoundError(f"Columns file not found: {COLUMNS_FILE}")
            with open(COLUMNS_FILE, 'rb') as f:
                train_columns = pickle.load(f)
        except Exception as e:
            print(f"‚ùå Error loading Training Columns: {e}")
        return ohe, train_columns

    def _load_training_data(self) -> pd.DataFrame:
        try:
            # Mock loading since actual files are not present in this context
            if not os.path.exists(TRAINING_DATA_FILE):
                raise FileNotFoundError(f"Training data file not found: {TRAINING_DATA_FILE}")

            train_data = pd.read_csv(TRAINING_DATA_FILE)
            train_data['instance_date'] = pd.to_datetime(train_data['instance_date'])
            return train_data
        except Exception as e:
            print(f"‚ùå Could not load training data for trend analysis: {e}")
            return None

    def _load_forecasting_data(self) -> pd.DataFrame:
        try:
            # Mock loading since actual files are not present in this context
            if not os.path.exists(FORECAST_DATA_FILE):
                raise FileNotFoundError(f"Forecast data file not found: {FORECAST_DATA_FILE}")

            growth_df = pd.read_csv(FORECAST_DATA_FILE)
            return growth_df
        except Exception as e:
            print(f"‚ùå Error loading forecasting data: {e}")
            return None

    def prepare_input_data(self, area, rooms, floor, pool, balcony_val, elevator_val, metro_val, parking, area_size):
        input_data = pd.DataFrame({
            'rooms_en': [rooms], 'floor_bin': [floor], 'swimming_pool': [pool],
            'balcony': [balcony_val], 'elevator': [elevator_val], 'metro': [metro_val],
            'has_parking': [parking], 'area_name_en': [area], 'procedure_area': [area_size]
        })
        area_name = input_data['area_name_en'].iloc[0]
        input_no_area = input_data.drop(columns=['area_name_en'])
        cat_cols = ['rooms_en', 'floor_bin']
        if self.ohe is None or self.train_columns is None:
            return None, None, None
        try:
            X_cat = self.ohe.transform(input_no_area[cat_cols])
            feature_names = self.ohe.get_feature_names_out(cat_cols)
            X_cat_df = pd.DataFrame(X_cat.toarray() if hasattr(X_cat, 'toarray') else X_cat, columns=feature_names)
            X_numerical = input_no_area.drop(columns=cat_cols)
            X_processed = pd.concat([X_numerical.reset_index(drop=True), X_cat_df.reset_index(drop=True)], axis=1)
        except Exception as e:
            print(f"‚ùå Error in encoding input: {e}")
            return None, None, None
        final_X = pd.DataFrame(0, index=X_processed.index, columns=self.train_columns)
        for col in X_processed.columns:
             if col in final_X.columns:
                 final_X[col] = X_processed[col]
        return final_X, area_name, input_data

    # Modified filter to ONLY filter by area name (Tier 3)
    def filter_training_data_by_area_only(self, train_data, area_name):
        if train_data is None:
            return pd.DataFrame()

        # Filter only by area name, ignoring all other property features
        filtered_data = train_data[train_data['area_name_en'] == area_name].copy()

        return filtered_data

    # Combined trend calculation, now specialized for Area Trend
    def calculate_area_trend(self, filtered_data):
        """Calculates LOESS trend for the entire area, returning a formatted DataFrame or None."""
        TREND_TYPE = 'Historical Trend (Entire Area)'

        if filtered_data is None or len(filtered_data) < 2:
            return pd.DataFrame({'Month': [], 'Median Price': [], 'Type': []})

        filtered = filtered_data.copy()
        filtered['instance_date'] = pd.to_datetime(filtered['instance_date'])
        filtered['year_month'] = filtered['instance_date'].dt.to_period('M')
        monthly_data = filtered.groupby('year_month')['meter_sale_price'].agg(['median', 'count']).reset_index()
        monthly_data = monthly_data.rename(columns={'median': 'meter_sale_price', 'count': 'data_points'})
        monthly_data['timestamp'] = monthly_data['year_month'].dt.to_timestamp()
        monthly_data = monthly_data.sort_values('timestamp').reset_index(drop=True)

        if len(monthly_data) < 2:
            return pd.DataFrame({'Month': [], 'Median Price': [], 'Type': []})

        try:
            # Ensure numeric arrays
            monthly_data['num_index'] = np.arange(len(monthly_data))
            y_values = monthly_data['meter_sale_price'].astype(float).values
            x_values = monthly_data['num_index'].astype(float).values

            # Coerce LOESS parameters to numeric with safe fallbacks
            try:
                frac = float(LOESS_FRAC)
            except Exception:
                frac = 0.1
            try:
                iters = int(LOESS_IT)
            except Exception:
                iters = 3

            loess_smoothed = lowess(y_values, x_values, frac=frac, it=iters)

            # Validate LOESS output
            if loess_smoothed is None or loess_smoothed.size == 0:
                raise ValueError('LOESS returned no data')
            # loess_smoothed[:,0] are x-values (floats). Map to nearest integer positions.
            trend_pos = np.rint(loess_smoothed[:, 0]).astype(int)
            # Clamp positions to valid row indices
            trend_pos = np.clip(trend_pos, 0, len(monthly_data) - 1)
            # Preserve order but remove duplicates
            _, unique_idx = np.unique(trend_pos, return_index=True)
            trend_pos = trend_pos[np.sort(unique_idx)]

            # Build trend DataFrame. Use errors='coerce' when parsing dates to avoid crashes.
            months = monthly_data['timestamp'].iloc[trend_pos]
            trend_df = pd.DataFrame({
                'Month': months.dt.strftime(DATE_FORMAT).values,
                'Median Price': loess_smoothed[:, 1],
                'Type': TREND_TYPE
            })
            # Add temporary key for sorting before dropping it
            trend_df['Sort_Key'] = pd.to_datetime(trend_df['Month'], format=DATE_FORMAT, errors='coerce')
            return trend_df.sort_values('Sort_Key').drop(columns=['Sort_Key'])
        except Exception as e:
            # Helpful debug output so the root cause is visible when running the notebook
            print(f"‚ùå Error during LOESS calculation ({TREND_TYPE}): {e}")
            try:
                # If available, show small diagnostics
                print(f"  monthly_data rows={len(monthly_data)}, LOESS_FRAC={LOESS_FRAC}, LOESS_IT={LOESS_IT}")
            except Exception:
                pass
            return pd.DataFrame({'Month': [], 'Median Price': [], 'Type': []})

    def prepare_forecast_data(self, area_name):
        if self.growth_pivot is None:
            return None
        area_growth = self.growth_pivot[self.growth_pivot['area_name_en'] == area_name]
        if area_growth.empty:
            return None
        periods = area_growth['month'].unique()
        forecast_data = {}
        for period in periods:
            period_data = area_growth[area_growth['month'] == period].iloc[0]
            forecast_data[period] = {
                'main': period_data['growth_factor'],
                'upper': period_data['growth_factor_upper'],
                'lower': period_data['growth_factor_lower']
            }
        return forecast_data
        

In [19]:
# Assuming the entire PropertyPricePredictor class definition from your prompt is already defined above this block.

if __name__ == "__main__":
    
    # 1. Instantiate the predictor engine
    engine = PropertyPricePredictor()

    print("\n" + "="*70)
    print("üöÄ Running Analysis for Sample Input: Al Barsha South Fourth (2 B/R, 60 sqMt)")
    print("="*70)
    
    # --- Define Input Features ---
    selected_area = 'Al Hebaih Fourth'
    rooms_en = '2 B/R'             
    floor_bin = '11-20'            
    swimming_pool = 1              # 1 for Yes, 0 for No
    balcony = 1
    elevator = 1
    metro = 0
    has_parking = 1
    procedure_area = 60 # sqMt    

    # 2. Call the method and receive the combined DataFrame
    results_df = engine.predict_and_analyze(
        selected_area, rooms_en, floor_bin, swimming_pool, balcony, 
        elevator, metro, has_parking, procedure_area
    )


print("\n--- Execution complete. ---")

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations



üöÄ Running Analysis for Sample Input: Al Barsha South Fourth (2 B/R, 60 sqMt)


AttributeError: 'PropertyPricePredictor' object has no attribute 'predict_and_analyze'

In [13]:
results_df


Unnamed: 0,Month,Median Price,Type
0,01-08-2025,19484.89,Prediction Point
1,2025-09-01,19708.184572,Future Forecast
2,2025-10-01,19930.603201,Future Forecast
3,2025-11-01,20152.900051,Future Forecast
4,2025-12-01,20375.584729,Future Forecast
5,2026-01-01,20598.89932,Future Forecast
6,2026-02-01,20822.861869,Future Forecast
