<a href="https://colab.research.google.com/github/Norawit29/resource_prediction/blob/main/02_model_training_ipynbipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Part 2. Model Training and Hyperparameter Tuning

In [None]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from scipy.stats import randint, uniform
import pickle


# Model Training and Hyperparameter Tuning
# Define the models
models = {
    'LinearRegression': LinearRegression(),
    'Ridge': Ridge(),
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet(),
    'RandomForest': RandomForestRegressor(),
    'XGBoost': XGBRegressor(),
    'LightGBM': LGBMRegressor()
}

# Hyperparameter space for each algorithm
hyperparameter_space = {
    'Ridge': {
        'model__alpha': [0.1, 1, 10, 100, 1000]
    },
    'Lasso': {
        'model__alpha': [0.1, 1, 10, 100, 1000]
    },
    'ElasticNet': {
        'model__alpha': [0.1, 1, 10, 100, 1000],
        'model__l1_ratio': [0.1, 0.5, 0.9]
    },
    'RandomForest': {
        'model__n_estimators': randint(100, 1000),
        'model__max_features': ['auto', 'sqrt'],
        'model__max_depth': randint(10, 100),
        'model__min_samples_split': randint(2, 10),
        'model__min_samples_leaf': randint(1, 4),
        'model__bootstrap': [True, False]
    },
    'XGBoost': {
        'model__n_estimators': randint(100, 1000),
        'model__learning_rate': uniform(0.01, 0.3),
        'model__max_depth': randint(3, 10),
        'model__min_child_weight': randint(1, 10),
        'model__subsample': uniform(0.6, 0.4),
        'model__colsample_bytree': uniform(0.5, 0.5)
    },
    'LightGBM': {
        'model__n_estimators': randint(100, 1000),
        'model__learning_rate': uniform(0.01, 0.3),
        'model__num_leaves': randint(20, 50),
        'model__max_depth': randint(3, 10),
        'model__min_child_samples': randint(10, 30),
        'model__subsample': uniform(0.6, 0.4)
    }
}

# Training and tuning each model
best_models = {}
for model_name, model in models.items():
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('model', model)
    ])

    if model_name in hyperparameter_space:
        # Perform Randomized Search
        random_search = RandomizedSearchCV(
            pipeline,
            param_distributions=hyperparameter_space[model_name],
            n_iter=10,
            cv=5,
            verbose=2,
            random_state=42,
            n_jobs=-1,
            scoring='neg_root_mean_squared_error'
        )
        random_search.fit(X_train, y_train)
        best_models[model_name] = random_search.best_estimator_
        print(f"Best parameters for {model_name}: {random_search.best_params_}")
        print(f"Best score (negative RMSE) for {model_name}: {random_search.best_score_}")

    else:
        # Fit model without hyperparameter tuning
        pipeline.fit(X_train, y_train)
        best_models[model_name] = pipeline

     # Save the best model for each algorithm
    with open(f'best_model_{model_name}.pkl', 'wb') as file:
        pickle.dump(best_model, file)
    print(f"Best model for {model_name} saved as 'best_model_{model_name}.pkl'")

