In [5]:
import numpy as np
from functions import splitting, evaluate_model
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
import time
import pickle

In [9]:
X_train, X_test, y_train, y_test = splitting('date')

# Assuming y_train and y_test are 2D arrays
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

0    2014010101
1    2014010102
2    2014010103
3    2014010104
4    2014010105
Name: datetime, dtype: int64


In [10]:
# Define the decision tree regression model
xg_model = GradientBoostingRegressor(random_state=42)

# Define the parameter grid for XGBoost
param_grid = {
    'n_estimators': [50, 100, 150],           # Number of boosting stages (trees) to be used in the ensemble
    'learning_rate': [0.01, 0.05, 0.1],       # Learning rate shrinks the contribution of each tree
    'max_depth': [3, 4, 5],                   # Maximum depth of the individual trees
    'min_samples_split': [2, 5, 10],          # Minimum number of samples required to split an internal node
    'min_samples_leaf': [1, 2, 4],            # Minimum number of samples required to be at a leaf node
}

In [11]:
total_time_start = time.time()
# Perform grid search cross-validation
grid_search_xg = GridSearchCV(xg_model, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search_xg.fit(X_train, y_train)

# Get the best hyperparameters
best_params_xg = grid_search_xg.best_params_

start_time = time.time()
# Initialize the decision tree regression model with the best hyperparameters
xg_model_best = GradientBoostingRegressor(**best_params_xg, random_state=42)
xg_model_best.fit(X_train, y_train)

end_time = time.time()
training_time = end_time - start_time
total_time = end_time - total_time_start

# Save the trained model to a .pickle file
with open('/Users/sahilnakrani/Documents/weather forecast/src/Machine-Learning/Regression-Models/trained_models/GradientBoostRegression/with_only_date/GradientBoostRegressor.pkl', 'wb') as f:   
    pickle.dump(xg_model_best, f)

# Save training times to a text file
with open("/Users/sahilnakrani/Documents/weather forecast/src/Machine-Learning/Regression-Models/trained_models/GradientBoostRegression/with_only_date/model_training_time.txt", "w") as f:
    f.write("GradientBoostRegressor Training Time : {:.6f} seconds\n".format(training_time))
    f.write("Total Time with GridSearchCV : {:.6f} seconds\n".format(total_time))


In [12]:
# Make predictions
y_pred_dt_best = xg_model_best.predict(X_test)

#evaluting the Model's performance.
evaluate_model(xg_model_best, X_test, y_test, 'GradientBoostRegression', 'date')

Model: GradientBoostRegression
Mean Squared Error: 51.08836768392284
Root Mean Squared Error: 7.147612726213056
R2 Score: -0.00045544795522811654
