In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!ls -l "/content/drive/MyDrive/"

total 133829
drwx------ 2 root root      4096 Jun  2  2025  Classroom
-rw------- 1 root root       183 Nov 13 21:44 'CO7 TECHNOLOGIES: INFORMATION-SYSTEMS SELECTION.gdoc'
drwx------ 2 root root      4096 Feb 24  2025 'Colab Notebooks'
drwx------ 2 root root      4096 Nov  1 15:29 'Maastricht university'
drwx------ 2 root root      4096 Dec 30 19:25 'ML2 regression'
drwx------ 2 root root      4096 Nov  1 15:35  Private
drwx------ 2 root root      4096 Nov  1 15:31 'University of Warsaw'
-rw------- 1 root root 137010832 Dec 26 19:01  USA_housing_project.zip
drwx------ 2 root root      4096 Dec 21 22:29  usa_rent_ml2_regression


In [3]:
!ls -l "/content/drive/MyDrive/ML2 regression"

total 95
drwx------ 2 root root  4096 Dec 30 19:25 artifacts
drwx------ 2 root root  4096 Dec 30 19:25 data
-rw------- 1 root root 84972 Dec 28 18:20 just.docx
drwx------ 2 root root  4096 Dec 30 19:25 notebooks


In [4]:
import os
import numpy as np
import pandas as pd
import pickle

In [5]:
BASE_PATH = "/content/drive/MyDrive/ML2 regression"

DATA_PATH = os.path.join(BASE_PATH, "data", "processed")
PREP_PATH = os.path.join(BASE_PATH, "artifacts", "preprocessing")
MODEL_PATH = os.path.join(BASE_PATH, "artifacts", "models")

print("Base path:", BASE_PATH)
print("Data path:", DATA_PATH)
print("Preprocessing path:", PREP_PATH)
print("Model path:", MODEL_PATH)

Base path: /content/drive/MyDrive/ML2 regression
Data path: /content/drive/MyDrive/ML2 regression/data/processed
Preprocessing path: /content/drive/MyDrive/ML2 regression/artifacts/preprocessing
Model path: /content/drive/MyDrive/ML2 regression/artifacts/models


In [6]:
X_valid = pd.read_csv(os.path.join(DATA_PATH, "X_valid_raw.csv"))
y_valid = pd.read_csv(os.path.join(DATA_PATH, "y_valid.csv"))

print("Validation data loaded:")
print("X_valid shape:", X_valid.shape)
print("y_valid shape:", y_valid.shape)

Validation data loaded:
X_valid shape: (37582, 18)
y_valid shape: (37582, 1)


In [8]:
!pip install category_encoders

Collecting category_encoders
  Downloading category_encoders-2.9.0-py3-none-any.whl.metadata (7.9 kB)
Downloading category_encoders-2.9.0-py3-none-any.whl (85 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.9/85.9 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: category_encoders
Successfully installed category_encoders-2.9.0


In [9]:
with open(os.path.join(PREP_PATH, "preprocessor.pkl"), "rb") as f:
    preprocessor = pickle.load(f)

print("Preprocessor loaded successfully.")

Preprocessor loaded successfully.


In [10]:
X_valid_transformed = preprocessor.transform(X_valid)
print("Validation data transformed. Shape:", X_valid_transformed.shape)

Validation data transformed. Shape: (37582, 89)


In [12]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [13]:
def load_model(name):
    with open(os.path.join(MODEL_PATH, name), "rb") as f:
        return pickle.load(f)

dt_model        = load_model("decision_tree_best.pkl")
rf_model        = load_model("random_forest_best.pkl")
bag_default     = load_model("bagging_model_best.pkl")
bag_tuned       = load_model("bagging_model_tuned.pkl")
xgb_model       = load_model("xgboost_best.pkl")
cat_model       = load_model("catboost_strong.pkl")
stack_model     = load_model("stacking_model.pkl")

print("All models loaded successfully.")

All models loaded successfully.


In [14]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import pandas as pd

In [17]:

y_valid_series = np.ravel(y_valid)
def evaluate_model(name, model, X, y):
    preds = model.predict(X)
    rmse = np.sqrt(mean_squared_error(y, preds))
    mae = mean_absolute_error(y, preds)
    mape = np.mean(np.abs((y - preds) / (y + 1e-8))) * 100
    r2 = r2_score(y, preds)
    return {
        "Model": name,
        "RMSE": rmse,
        "MAE": mae,
        "MAPE": mape,
        "R2": r2
    }

stack_valid = np.column_stack([
    dt_model.predict(X_valid_transformed),
    rf_model.predict(X_valid_transformed),
    bag_default.predict(X_valid_transformed),
    bag_tuned.predict(X_valid_transformed),
    xgb_model.predict(X_valid_transformed),
    cat_model.predict(X_valid_transformed)
])

print("Stacking validation features shape:", stack_valid.shape)
results = []

results.append(evaluate_model("Decision Tree", dt_model, X_valid_transformed, y_valid_series))
results.append(evaluate_model("Random Forest", rf_model, X_valid_transformed, y_valid_series))
results.append(evaluate_model("Bagging (Default)", bag_default, X_valid_transformed, y_valid_series))
results.append(evaluate_model("Bagging (Tuned)", bag_tuned, X_valid_transformed, y_valid_series))
results.append(evaluate_model("XGBoost", xgb_model, X_valid_transformed, y_valid_series))
results.append(evaluate_model("CatBoost", cat_model, X_valid_transformed, y_valid_series))

# IMPORTANT: Stacking uses stack_valid (6 features), not X_valid_transformed (89 features)
results.append(evaluate_model("Stacking Meta-Model", stack_model, stack_valid, y_valid_series))
df_results = pd.DataFrame(results)
df_results_sorted = df_results.sort_values(by="RMSE")

df_results_sorted

Stacking validation features shape: (37582, 6)


Unnamed: 0,Model,RMSE,MAE,MAPE,R2
2,Bagging (Default),0.189121,0.065142,1.226465,0.860745
6,Stacking Meta-Model,0.192928,0.07088,1.322564,0.855082
5,CatBoost,0.209221,0.10578,1.939964,0.829571
4,XGBoost,0.217981,0.11312,2.11712,0.815
3,Bagging (Tuned),0.264111,0.148449,2.821532,0.728415
1,Random Forest,0.296109,0.167945,3.346845,0.658621
0,Decision Tree,0.353721,0.211871,3.954614,0.512859


## Model Validation

In this notebook, we evaluate all trained models on the **validation set** to measure their real generalization performance.  
The goal is to compare models fairly, select the most reliable one, and prepare for final testing.

###  What We Did
- Loaded the validation dataset (`X_valid_raw`, `y_valid`) from the processed data folder.
- Loaded the preprocessing pipeline and applied the exact same transformations used during training.
- Loaded all trained models from the artifacts directory.
- Generated predictions for each model on the transformed validation data.
- Built stacking meta-features and evaluated the stacking meta-model correctly.
- Calculated core performance metrics (RMSE, MAE, MAPE, R²) for every model.
- Created a leaderboard to identify the best-performing model.
- **Bagging (Default)** achieved the best validation performance overall.
- **Stacking Meta‑Model** performed very close to Bagging and showed strong stability.
- **Decision Tree** was the weakest model, confirming it is not suitable for final deployment. These results help us identify which models are reliable enough to move forward to the final test evaluation.

In [18]:
VALID_PATH = os.path.join(BASE_PATH, "artifacts", "validation")
os.makedirs(VALID_PATH, exist_ok=True)
save_path = os.path.join(VALID_PATH, "validation_results.csv")
df_results_sorted.to_csv(save_path, index=False)

print("Validation results saved to:", save_path)


Validation results saved to: /content/drive/MyDrive/ML2 regression/artifacts/validation/validation_results.csv
