In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!ls -l "/content/drive/MyDrive/ML2 regression"

total 95
drwx------ 2 root root  4096 Dec 30 19:25 artifacts
drwx------ 2 root root  4096 Dec 30 19:25 data
-rw------- 1 root root 84972 Dec 28 18:20 just.docx
drwx------ 2 root root  4096 Dec 30 19:25 notebooks


In [4]:
import os
import numpy as np
import pandas as pd
import pickle

In [5]:
BASE_PATH = "/content/drive/MyDrive/ML2 regression"

DATA_PATH = os.path.join(BASE_PATH, "data", "processed")
PREP_PATH = os.path.join(BASE_PATH, "artifacts", "preprocessing")
MODEL_PATH = os.path.join(BASE_PATH, "artifacts", "models")

print("Base path:", BASE_PATH)
print("Data path:", DATA_PATH)
print("Preprocessing path:", PREP_PATH)
print("Model path:", MODEL_PATH)

Base path: /content/drive/MyDrive/ML2 regression
Data path: /content/drive/MyDrive/ML2 regression/data/processed
Preprocessing path: /content/drive/MyDrive/ML2 regression/artifacts/preprocessing
Model path: /content/drive/MyDrive/ML2 regression/artifacts/models


In [6]:
X_test = pd.read_csv(os.path.join(DATA_PATH, "X_test_raw.csv"))
y_test = pd.read_csv(os.path.join(DATA_PATH, "y_test.csv"))

print("Test data loaded:")
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

Test data loaded:
X_test shape: (37583, 18)
y_test shape: (37583, 1)


In [9]:
!pip install category_encoders

Collecting category_encoders
  Downloading category_encoders-2.9.0-py3-none-any.whl.metadata (7.9 kB)
Downloading category_encoders-2.9.0-py3-none-any.whl (85 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.9/85.9 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: category_encoders
Successfully installed category_encoders-2.9.0


In [10]:
with open(os.path.join(PREP_PATH, "preprocessor.pkl"), "rb") as f:
    preprocessor = pickle.load(f)

print("Preprocessor loaded successfully.")

Preprocessor loaded successfully.


In [11]:
X_test_transformed = preprocessor.transform(X_test)
print("Test data transformed. Shape:", X_test_transformed.shape)

Test data transformed. Shape: (37583, 89)


In [13]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [14]:
def load_model(name):
    with open(os.path.join(MODEL_PATH, name), "rb") as f:
        return pickle.load(f)

dt_model        = load_model("decision_tree_best.pkl")
rf_model        = load_model("random_forest_best.pkl")
bag_default     = load_model("bagging_model_best.pkl")
bag_tuned       = load_model("bagging_model_tuned.pkl")
xgb_model       = load_model("xgboost_best.pkl")
cat_model       = load_model("catboost_strong.pkl")
stack_model     = load_model("stacking_model.pkl")

print("All models loaded successfully.")

All models loaded successfully.


In [15]:
stack_test = np.column_stack([
    dt_model.predict(X_test_transformed),
    rf_model.predict(X_test_transformed),
    bag_default.predict(X_test_transformed),
    bag_tuned.predict(X_test_transformed),
    xgb_model.predict(X_test_transformed),
    cat_model.predict(X_test_transformed)
])

print("Stacking test features shape:", stack_test.shape)

Stacking test features shape: (37583, 6)


In [16]:
y_test_series = np.ravel(y_test)
print("y_test shape after flatten:", y_test_series.shape)

y_test shape after flatten: (37583,)


In [17]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [19]:
def evaluate_model(name, model, X, y):
    preds = model.predict(X)
    rmse = np.sqrt(mean_squared_error(y, preds))
    mae = mean_absolute_error(y, preds)
    mape = np.mean(np.abs((y - preds) / (y + 1e-8))) * 100
    r2 = r2_score(y, preds)
    return {
        "Model": name,
        "RMSE": rmse,
        "MAE": mae,
        "MAPE": mape,
        "R2": r2
    }

In [20]:
results_test = []

# Bagging (Default) on full transformed test data
results_test.append(
    evaluate_model("Bagging (Default)", bag_default, X_test_transformed, y_test_series)
)

# Stacking Meta-Model on stacking features
results_test.append(
    evaluate_model("Stacking Meta-Model", stack_model, stack_test, y_test_series)
)

df_test_results = pd.DataFrame(results_test)
df_test_results_sorted = df_test_results.sort_values(by="RMSE")

df_test_results_sorted

Unnamed: 0,Model,RMSE,MAE,MAPE,R2
0,Bagging (Default),0.177557,0.064748,1.13974,0.873412
1,Stacking Meta-Model,0.180251,0.070507,1.234947,0.869542


In [22]:
TEST_PATH = os.path.join(BASE_PATH, "artifacts", "test")
os.makedirs(TEST_PATH, exist_ok=True)
save_path = os.path.join(TEST_PATH, "test_results.csv")
df_test_results_sorted.to_csv(save_path, index=False)

print("Test results saved to:", save_path)

Test results saved to: /content/drive/MyDrive/ML2 regression/artifacts/test/test_results.csv


##Final Test Evaluation

In this notebook, we evaluated our two strongest models from the validation stage **Bagging (Default)** and the **Stacking Meta‑Model**  on the final test dataset.  

### (Simplified)
- **Bagging (Default)**  
  - RMSE: 0.1776  
  - MAE: 0.0647  
  - MAPE: 1.14%  
  - R²: 0.8734  

- **Stacking Meta‑Model**  
  - RMSE: 0.1803  
  - MAE: 0.0705  
  - MAPE: 1.23%  
  - R²: 0.8695  

Bagging (Default) achieved slightly better accuracy across all metrics.  
Its lower RMSE and MAE indicate that its predictions are closer to the true values, and its higher R² shows that it explains more variance in the target.  
The Stacking Meta‑Model also performed very well, confirming that our model selection process was stable and consistent.

In [21]:
# Predict in log-space
preds_log = bag_default.predict(X_test_transformed)

# Convert back to original scale
y_pred_original = np.exp(preds_log)
y_test_original = np.exp(y_test_series)

rmse_original = np.sqrt(mean_squared_error(y_test_original, y_pred_original))
mae_original = mean_absolute_error(y_test_original, y_pred_original)
mape_original = np.mean(np.abs((y_test_original - y_pred_original) / (y_test_original + 1e-8))) * 100
r2_original = r2_score(y_test_original, y_pred_original)

print("=== Metrics in ORIGINAL scale ===")
print(f"RMSE: {rmse_original:.4f}")
print(f"MAE: {mae_original:.4f}")
print(f"MAPE: {mape_original:.4f}%")
print(f"R²: {r2_original:.4f}")
comparison_df = pd.DataFrame({
    "Actual (original)": y_test_original[:10],
    "Predicted (original)": y_pred_original[:10]
})

print("\n=== Sample Predictions in Original Scale ===")
print(comparison_df)

=== Metrics in ORIGINAL scale ===
RMSE: 208.7539
MAE: 78.9000
MAPE: 14.2101%
R²: 0.8688

=== Sample Predictions in Original Scale ===
   Actual (original)  Predicted (original)
0              971.0           1018.826997
1              741.0            724.667637
2             1601.0           1736.645376
3              481.0            478.520926
4             2806.0           2672.000281
5             1416.0           1511.269954
6             1481.0           1504.213504
7              701.0            758.689781
8             1366.0           1369.459158
9             1850.0           1852.572760
