In [1]:
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler

In [2]:
# Load the dataset
data_path = '/kaggle/input/regression'
X_train = np.load(f'{data_path}/X_train.npy')
X_test = np.load(f'{data_path}/X_test.npy')
y_train = np.load(f'{data_path}/y_train.npy')
y_test = np.load(f'{data_path}/y_test.npy')

# Reshape y_train and y_test to be 1-dimensional
y_train = y_train.ravel()
y_test = y_test.ravel()

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [3]:
from sklearn.svm import SVR

svr = SVR()

# Hyperparameter tuning for SVR
param_grid_svr = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10],
    'epsilon': [0.01, 0.1, 0.2]
}

grid_search_svr = GridSearchCV(estimator=svr, param_grid=param_grid_svr, cv=5, n_jobs=-1, scoring='r2')
grid_search_svr.fit(X_train_scaled, y_train)

best_params_svr = grid_search_svr.best_params_
svr_best = SVR(**best_params_svr)

svr_best.fit(X_train_scaled, y_train)

y_pred_svr = svr_best.predict(X_test_scaled)
r2_svr = r2_score(y_test, y_pred_svr)
mse_svr = mean_squared_error(y_test, y_pred_svr)

print(f"SVR R² test score: {r2_svr:.2f}\nSVR test MSE: {mse_svr:.2f}")
print(best_params_svr)

SVR R² test score: 0.70
SVR test MSE: 0.22
{'C': 0.1, 'epsilon': 0.1, 'kernel': 'linear'}


In [4]:
from sklearn.ensemble import GradientBoostingRegressor

gradient_boosting = GradientBoostingRegressor(random_state=42)

# Hyperparameter tuning for Gradient Boosting
param_grid_gb = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.5],
    'max_depth': [3, 5, 7]
}

grid_search_gb = GridSearchCV(estimator=gradient_boosting, param_grid=param_grid_gb, cv=5, n_jobs=-1, scoring='r2')
grid_search_gb.fit(X_train_scaled, y_train)

best_params_gb = grid_search_gb.best_params_
gb_best = GradientBoostingRegressor(**best_params_gb, random_state=42)

gb_best.fit(X_train_scaled, y_train)

y_pred_gb = gb_best.predict(X_test_scaled)
r2_gb = r2_score(y_test, y_pred_gb)

mse_gb = mean_squared_error(y_test, y_pred_gb)
print(f"Gradient Boosting R² test score: {r2_gb:.2f}\nGradient Boosting MSE test: {mse_gb:.2f}")
print(best_params_gb)

Gradient Boosting R² test score: 0.56
Gradient Boosting MSE test: 0.32
{'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}


In [5]:
from sklearn.linear_model import Ridge

ridge_reg = Ridge()

# Hyperparameter tuning for Ridge Regression
param_grid_ridge = {
    'alpha': [0.1, 1, 10]
}

grid_search_ridge = GridSearchCV(estimator=ridge_reg, param_grid=param_grid_ridge, cv=5, scoring='r2')
grid_search_ridge.fit(X_train_scaled, y_train)

best_params_ridge = grid_search_ridge.best_params_
ridge_best = Ridge(**best_params_ridge)

ridge_best.fit(X_train_scaled, y_train)

y_pred_ridge = ridge_best.predict(X_test_scaled)
r2_ridge = r2_score(y_test, y_pred_ridge)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
print(f"Ridge Regression R² test score: {r2_ridge:.2f}\nRidge Regression MSE test: {mse_ridge:.2f}")

Ridge Regression R² test score: 0.72
Ridge Regression MSE test: 0.21


In [6]:
from sklearn.linear_model import Lasso

# Initialize Lasso Regression
lasso_reg = Lasso()

# Hyperparameter tuning for Lasso Regression
param_grid_lasso = {
    'alpha': [0.1, 1, 10]
}

grid_search_lasso = GridSearchCV(estimator=lasso_reg, param_grid=param_grid_lasso, cv=5, scoring='r2')
grid_search_lasso.fit(X_train_scaled, y_train)

best_params_lasso = grid_search_lasso.best_params_
lasso_best = Lasso(**best_params_lasso)

lasso_best.fit(X_train_scaled, y_train)

y_pred_lasso = lasso_best.predict(X_test_scaled)
r2_lasso = r2_score(y_test, y_pred_lasso)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)

print(f"Lasso Regression R² test score: {r2_lasso:.2f}")
print(f"Lasso Regression MSE test: {mse_lasso:.2f}")
print(best_params_lasso)

Lasso Regression R² test score: 0.80
Lasso Regression MSE test: 0.15
{'alpha': 0.1}


In [7]:
from sklearn.linear_model import ElasticNet

elastic_net = ElasticNet()

# Hyperparameter tuning for ElasticNet Regression
param_grid_en = {
    'alpha': [0.1, 1, 10],
    'l1_ratio': [0.3, 0.5, 0.7]
}

grid_search_en = GridSearchCV(estimator=elastic_net, param_grid=param_grid_en, cv=5, scoring='r2')
grid_search_en.fit(X_train_scaled, y_train)

best_params_en = grid_search_en.best_params_
en_best = ElasticNet(**best_params_en)

en_best.fit(X_train_scaled, y_train)

y_pred_en = en_best.predict(X_test_scaled)
r2_en = r2_score(y_test, y_pred_en)
mse_en = mean_squared_error(y_test, y_pred_en)

print(f"ElasticNet Regression R² test score: {r2_en:.2f}")
print(f"ElasticNet Regression MSE test: {mse_en:.2f}")
print(best_params_en)

ElasticNet Regression R² test score: 0.90
ElasticNet Regression MSE test: 0.08
{'alpha': 0.1, 'l1_ratio': 0.3}


In [8]:
import xgboost as xgb

xgb_reg = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)

# Hyperparameter tuning for XgBoost Regression
param_grid_xgb = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.5],
    'max_depth': [3, 5, 7],
    'gamma': [0, 0.1, 0.2]
}

grid_search_xgb = GridSearchCV(estimator=xgb_reg, param_grid=param_grid_xgb, cv=5, n_jobs=-1, scoring='r2')
grid_search_xgb.fit(X_train_scaled, y_train)

best_params_xgb = grid_search_xgb.best_params_
xgb_best = xgb.XGBRegressor(**best_params_xgb, objective='reg:squarederror', random_state=42)

xgb_best.fit(X_train_scaled, y_train)

y_pred_xgb = xgb_best.predict(X_test_scaled)
r2_xgb = r2_score(y_test, y_pred_xgb)
mse_xgb = mean_squared_error(y_test, y_pred_xgb)

print(f"XGBoost R² test score: {r2_xgb:.2f}")
print(f"XGBoost MSE test: {mse_xgb:.2f}")
print(best_params_xgb)

XGBoost R² test score: 0.56
XGBoost MSE test: 0.33
{'gamma': 0, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
