In [1]:
import os 
path= os.getcwd()

if path.endswith('notebooks'):
    os.chdir('../')

In [14]:
# import dependencies
import numpy as np 
import pandas as pd 
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, StackingRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, root_mean_squared_error
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

In [11]:
# Load train and test features/targets
x_train = pd.read_csv('artifacts/data_transformation/train_data/train_features.csv')
y_train = pd.read_csv('artifacts/data_transformation/train_data/train_target.csv')
x_test = pd.read_csv('artifacts/data_transformation/test_data/test_features.csv')
y_test = pd.read_csv('artifacts/data_transformation/test_data/test_target.csv')

# Define the models
glm_models = {
    'LinearRegression': LinearRegression(),
    'Ridge': Ridge(),
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet()
}

# List to store results
results = []

# Train, predict and evaluate
for name, model in glm_models.items():
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    
    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    results.append({
        'Model': name,
        'R2 Score': r2,
        'MSE': mse,
        'MAE': mae,
        'RMSE': rmse
    })

# Convert to DataFrame
results_df = pd.DataFrame(results)
print(results_df)


              Model  R2 Score            MSE         MAE        RMSE
0  LinearRegression  0.974970    4355.437771   46.638889   65.995741
1             Ridge  0.974970    4355.456117   46.634140   65.995880
2             Lasso  0.974616    4417.084003   47.072798   66.461147
3        ElasticNet  0.296923  122341.566084  298.697222  349.773593


In [13]:
 # Define GBM models
gbm_models = {
    'XGBoost': XGBRegressor(verbosity=0, n_estimators=50, n_jobs=-1),
    'LightGBM': LGBMRegressor(n_estimators=50, n_jobs=-1),
    'CatBoost': CatBoostRegressor(verbose=0, iterations=50, thread_count=-1),
    'GradientBoosting': GradientBoostingRegressor(n_estimators=50),
    'AdaBoostRegressor': AdaBoostRegressor(n_estimators=50),
    'RandomForestRegressor': RandomForestRegressor(n_estimators=50, n_jobs=-1)

}

# Store results
results = []

for name, model in gbm_models.items():
    model.fit(x_train, y_train.values.ravel())
    y_pred = model.predict(x_test)
    
    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    results.append({
        'Model': name,
        'R2 Score': r2,
        'MSE': mse,
        'MAE': mae,
        'RMSE': rmse
    })

# Save results in a DataFrame
gbm_results_df = pd.DataFrame(results)
print(gbm_results_df) 

                   Model  R2 Score          MSE        MAE       RMSE
0                XGBoost  0.983927  2796.854004  42.216209  52.885291
1               LightGBM  0.982120  3111.304095  44.358122  55.779065
2               CatBoost  0.985175  2579.604512  40.551416  50.789807
3       GradientBoosting  0.967599  5638.140483  59.318145  75.087552
4      AdaBoostRegressor  0.944904  9587.146022  78.846816  97.913973
5  RandomForestRegressor  0.983104  2940.052561  43.281529  54.222252


 * On Individual level catboost performed best

In [15]:
# Define base models
base_models = [
    ('catboost', CatBoostRegressor(verbose=0, iterations=50, thread_count=-1)),
    ('linear', LinearRegression())
]

# Define stacked model
stacked_model = StackingRegressor(
    estimators=base_models,
    final_estimator=LinearRegression()
)

# Fit and predict
stacked_model.fit(x_train, y_train.values.ravel())
y_pred = stacked_model.predict(x_test)

# Evaluate
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f"Stacked Model - R2: {r2:.4f}, MSE: {mse:.4f}, MAE: {mae:.4f}, RMSE: {rmse:.4f}")

Stacked Model - R2: 0.9852, MSE: 2578.8230, MAE: 40.5465, RMSE: 50.7821


* Stacked Model improved via negligable amount.