In [9]:
import numpy as np
from functions import splitting, evaluate_model
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
import time
import pickle

In [10]:
X_train, X_test, y_train, y_test = splitting('date')

# Assuming y_train and y_test are 2D arrays
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

0    2014010101
1    2014010102
2    2014010103
3    2014010104
4    2014010105
Name: datetime, dtype: int64


In [11]:
# Define the decision tree regression model
dt_model = DecisionTreeRegressor(random_state=42)

param_grid = {
    'splitter': ['best', 'random'],     # Strategy used to split at each node
    'max_depth': [None, 10, 20, 30, 40, 50],  # Maximum depth of the tree
    'min_samples_split': [2, 5, 10],     # Minimum number of samples required to split a node
    'min_samples_leaf': [1, 2, 4],       # Minimum number of samples required at each leaf node
}

In [12]:
total_time_start = time.time()
# Perform grid search cross-validation
grid_search_dt = GridSearchCV(dt_model, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search_dt.fit(X_train, y_train)

# Get the best hyperparameters
best_params_dt = grid_search_dt.best_params_

start_time = time.time()
# Initialize the decision tree regression model with the best hyperparameters
dt_model_best = DecisionTreeRegressor(**best_params_dt, random_state=42)
dt_model_best.fit(X_train, y_train)

end_time = time.time()
training_time = end_time - start_time
total_time = end_time - total_time_start

# Save the trained model to a .pickle file
with open('/Users/sahilnakrani/Documents/weather forecast/src/Machine-Learning/Regression-Models/trained_models/DecisionTreeRegression/with_only_date/DecisionTreeRegressor.pkl', 'wb') as f:   
    pickle.dump(dt_model_best, f)

# Save training times to a text file
with open("/Users/sahilnakrani/Documents/weather forecast/src/Machine-Learning/Regression-Models/trained_models/DecisionTreeRegression/with_only_date/model_training_time.txt", "w") as f:
    f.write("DecisionTreeRegressor Training Time : {:.6f} seconds\n".format(training_time))
    f.write("Total Time with GridSearchCV : {:.6f} seconds\n".format(total_time))


In [13]:
# Make predictions
y_pred_dt_best = dt_model_best.predict(X_test)

#evaluting the Model's performance.
evaluate_model(dt_model_best, X_test, y_test, 'DecisionTreeRegression', 'date')

Model: DecisionTreeRegression
Mean Squared Error: 51.917398526496136
Root Mean Squared Error: 7.205372892952601
R2 Score: -0.016690228211015734
