In [34]:
# Import necessary libraries
import numpy as np
import joblib
import pandas as pd
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, r2_score

In [35]:
def load_models():
    try:
        meta_model = joblib.load('C:\\Users\\Krithika JK\\Documents\\GitHub\\FYP\\HDBApp\\meta_model.pkl')
        rf = joblib.load('C:\\Users\\Krithika JK\\Documents\\GitHub\\FYP\\HDBApp\\random_forest_model.joblib')
        gb = joblib.load('C:\\Users\\Krithika JK\\Documents\\GitHub\\FYP\\HDBApp\\gradient_boosting_model.joblib')

        # Load XGBoost model using its load_model method
        xgb = XGBRegressor()  # Create an instance of XGBRegressor
        xgb.load_model('C:\\Users\\Krithika JK\\Documents\\GitHub\\FYP\\HDBApp\\xgb_model.json')  # Load the model from JSON file

        lr = joblib.load('C:\\Users\\Krithika JK\\Documents\\GitHub\\FYP\\HDBApp\\linear_regression_model.joblib')
        dt = joblib.load('C:\\Users\\Krithika JK\\Documents\\GitHub\\FYP\\HDBApp\\decision_tree_model.joblib')

        scaler = joblib.load('C:\\Users\\Krithika JK\\Documents\\GitHub\\FYP\\HDBApp\\scaler.pkl')

        return rf, gb, xgb, lr, dt, meta_model, scaler
    except Exception as e:
        print(f"Error loading models: {e}")
        return None, None, None, None, None, None, None

# Load the models
rf, gb, xgb, lr, dt, meta_model, scaler = load_models()

# Check if models are loaded correctly
if rf is not None and gb is not None and xgb is not None and lr is not None and dt is not None and meta_model is not None:
    # Load your validation DataFrame here
    val_df = pd.read_csv('C:\\Users\\Krithika JK\\Documents\\GitHub\\FYP\\data\\hdb_val.csv')  # Ensure you have the correct path

    # Display the first few rows of the validation DataFrame
#     print("Validation Data Preview:")
#     print(val_df.head(5))

    # Prepare the validation DataFrame similarly to input_df
    val_input_df = val_df.copy()

    # One-hot encoding for categorical variables in validation set
    towns = ['ANG MO KIO', 'BEDOK', 'BISHAN', 'BUKIT BATOK', 'BUKIT MERAH',
             'BUKIT TIMAH', 'CENTRAL AREA', 'CHOA CHU KANG', 'CLEMENTI',
             'GEYLANG', 'HOUGANG', 'JURONG EAST', 'JURONG WEST',
             'KALLANG/WHAMPOA', 'MARINE PARADE', 'QUEENSTOWN', 'SENGKANG',
             'SERANGOON', 'TAMPINES', 'TOA PAYOH', 'WOODLANDS', 'YISHUN',
             'LIM CHU KANG', 'SEMBAWANG', 'BUKIT PANJANG', 'PASIR RIS',
             'PUNGGOL']
             
    flat_types = ['1 ROOM', '2 ROOM', '3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE', 'MULTI GENERATION']
    flat_models = ['Improved', 'New Generation', 'Model A', 'Standard', 'Simplified',
                   'Model A-Maisonette', 'Apartment', 'Maisonette', 'Terrace',
                   '2-Room', 'Improved-Maisonette', 'Multi Generation',
                   'Premium Apartment', 'Adjoined flat', 'Premium Maisonette',
                   'Model A2', 'DBSS', 'Type S1', 'Type S2', 'Premium Apartment Loft',
                   '3Gen']

    for town in towns:
        val_input_df[f'town_{town}'] = val_input_df['town'].apply(lambda x: 1 if x == town else 0)

    for flat_type in flat_types:
        val_input_df[f'flat_type_{flat_type}'] = val_input_df['flat_type'].apply(lambda x: 1 if x == flat_type else 0)

    for flat_model in flat_models:
        val_input_df[f'flat_model_{flat_model}'] = val_input_df['flat_model'].apply(lambda x: 1 if x == flat_model else 0)

    # Ensure the expected columns for the validation DataFrame
    expected_columns = ['floor_area_sqm', 'nearest_supermarket_distance', 'nearest_school_distance', 'nearest_mrt_distance', 
                        'nearest_hawkers_distance', 'cbd_distance', 'year_of_sale', 'calculated_remaining_lease', 
                        'storey_median', 'town_BEDOK', 'town_BISHAN', 'town_BUKIT BATOK', 'town_BUKIT MERAH', 
                        'town_BUKIT PANJANG', 'town_BUKIT TIMAH', 'town_CENTRAL AREA', 'town_CHO CHU KANG', 
                        'town_CLEMENTI', 'town_GEYLANG', 'town_HOUGANG', 'town_JURONG EAST', 'town_JURONG WEST', 
                        'town_KALLANG/WHAMPOA', 'town_LIM CHU KANG', 'town_MARINE PARADE', 'town_PASIR RIS', 
                        'town_PUNGGOL', 'town_QUEENSTOWN', 'town_SEMBAWANG', 'town_SENGKANG', 'town_SERANGOON', 
                        'town_TAMPINES', 'town_TOA PAYOH', 'town_WOODLANDS', 'town_YISHUN', 'flat_model_3Gen', 
                        'flat_model_Adjoined flat', 'flat_model_Apartment', 'flat_model_DBSS', 'flat_model_Improved', 
                        'flat_model_Improved-Maisonette', 'flat_model_Maisonette', 'flat_model_Model A', 
                        'flat_model_Model A-Maisonette', 'flat_model_Model A2', 'flat_model_Multi Generation', 
                        'flat_model_New Generation', 'flat_model_Premium Apartment', 'flat_model_Premium Apartment Loft', 
                        'flat_model_Premium Maisonette', 'flat_model_Simplified', 'flat_model_Standard', 'flat_model_Terrace', 
                        'flat_model_Type S1', 'flat_model_Type S2', 'flat_type_2 ROOM', 'flat_type_3 ROOM', 'flat_type_4 ROOM', 
                        'flat_type_5 ROOM', 'flat_type_EXECUTIVE', 'flat_type_MULTI GENERATION']

    val_input_df = val_input_df.reindex(columns=expected_columns, fill_value=0)

    # Scale validation data
    val_input_scaled = scaler.transform(val_input_df)

    # Make predictions on the validation DataFrame
    rf_val_pred = rf.predict(val_input_scaled)
    gb_val_pred = gb.predict(val_input_scaled)
    xgb_val_pred = xgb.predict(val_input_scaled)
    lr_val_pred = lr.predict(val_input_scaled)
    dt_val_pred = dt.predict(val_input_scaled)

    # Combine predictions for meta-model
    X_val_meta = np.column_stack((rf_val_pred, gb_val_pred, lr_val_pred, xgb_val_pred, dt_val_pred))

    # Final predictions using meta-model
    y_val_pred = meta_model.predict(X_val_meta)

    # Calculate evaluation metrics
    rmse = root_mean_squared_error(val_df['resale_price'], y_val_pred)
    mae = mean_absolute_error(val_df['resale_price'], y_val_pred)
    r2 = r2_score(val_df['resale_price'], y_val_pred)

    # Display the evaluation metrics
    print("### Evaluation Metrics on Validation DataFrame:")
    print(f"**RMSE:** {rmse:.2f}")
    print(f"**MAE:** {mae:.2f}")
    print(f"**R²:** {r2:.2f}")

    # Plot predicted vs actual values
    plt.figure(figsize=(10, 6))
    plt.scatter(val_df['resale_price'], y_val_pred, alpha=0.7)
    plt.plot([val_df['resale_price'].min(), val_df['resale_price'].max()],
             [val_df['resale_price'].min(), val_df['resale_price'].max()],
             'r--', lw=2)
    plt.title('Predicted vs Actual Resale Prices')
    plt.xlabel('Actual Resale Price')
    plt.ylabel('Predicted Resale Price')
    plt.grid(True)
    plt.show()

else:
    print("Models could not be loaded.")


Error loading models: No module named '_loss'
Models could not be loaded.
