# Hyperparameter Tuning

In this notebook, we will conduct hyperparameter tuning for our selected models using grid search or random search techniques. We will evaluate the performance of different hyperparameter configurations to find the best model for predicting bike-sharing rental demand.

In [None]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import joblib

# Load the processed dataset
data = pd.read_csv('../data/processed/bike_sharing_data.csv')
X = data.drop('count', axis=1)
y = data['count']

# Define the models and their hyperparameters
models = {
    'RandomForest': {
        'model': RandomForestRegressor(),
        'params': {
            'n_estimators': [100, 200],
            'max_features': ['auto', 'sqrt'],
            'max_depth': [10, 20, None],
            'min_samples_split': [2, 5, 10]
        }
    },
    'LinearRegression': {
        'model': LinearRegression(),
        'params': {}
    }
}

# Function to perform hyperparameter tuning
def tune_model(model, params, X, y, search_type='grid'):
    if search_type == 'grid':
        search = GridSearchCV(model, params, n_jobs=-1, cv=5, scoring='neg_mean_squared_error')
    else:
        search = RandomizedSearchCV(model, params, n_jobs=-1, cv=5, scoring='neg_mean_squared_error', n_iter=10)
    search.fit(X, y)
    return search

# Perform hyperparameter tuning for each model
best_models = {}
for model_name, model_info in models.items():
    print(f'Tuning {model_name}...')
    best_model = tune_model(model_info['model'], model_info['params'], X, y, search_type='grid')
    best_models[model_name] = best_model
    print(f'Best parameters for {model_name}: {best_model.best_params_}')
    print(f'Best score for {model_name}: {-best_model.best_score_}')

# Save the best model
joblib.dump(best_models['RandomForest'], '../models/checkpoints/best_random_forest_model.pkl')
joblib.dump(best_models['LinearRegression'], '../models/checkpoints/best_linear_regression_model.pkl')

print('Hyperparameter tuning completed and models saved.')