In [1]:
from functions import splitting, evaluate_model
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
import time
import pickle

In [2]:
X_train, X_test, y_train, y_test = splitting('date')

0    2014010101
1    2014010102
2    2014010103
3    2014010104
4    2014010105
Name: datetime, dtype: int64


In [3]:
# Define the pipeline with polynomial features and linear regression
poly_model = make_pipeline(PolynomialFeatures(), LinearRegression())

# Define the hyperparameter grid
param_grid_poly = {
    'polynomialfeatures__degree': [1, 2, 3]  # Adjust degrees as needed
}

In [4]:
total_time_start = time.time()
# Perform grid search cross-validation
grid_search_poly = GridSearchCV(poly_model, param_grid_poly, cv=5, scoring='neg_mean_squared_error')
grid_search_poly.fit(X_train, y_train)

# Get the best polynomial degree
best_degree_poly = grid_search_poly.best_params_['polynomialfeatures__degree']

start_time = time.time()
# Initialize the polynomial regression model with the best degree
poly_model_best = make_pipeline(PolynomialFeatures(degree=best_degree_poly), LinearRegression())
poly_model_best.fit(X_train, y_train)

end_time = time.time()
training_time = end_time - start_time
total_time = end_time - total_time_start

# Save the trained model to a .pickle file
with open('/Users/sahilnakrani/Documents/weather forecast/src/Machine-Learning/Regression-Models/trained_models/PolynomialRegression/with_only_date/PolynomialRegressionModel.pkl', 'wb') as f:   
    pickle.dump(poly_model_best, f)

# Save training times to a text file
with open("/Users/sahilnakrani/Documents/weather forecast/src/Machine-Learning/Regression-Models/trained_models/PolynomialRegression/with_only_date/model_training_time.txt", "w") as f:
    f.write("PolynomialRegression Training Time : {:.6f} seconds\n".format(training_time))
    f.write("Total Time with GridSearchCV : {:.6f} seconds\n".format(total_time))

In [5]:
# Make predictions on the testing data
y_pred = poly_model_best.predict(X_test)
#evaluting the Model's performance.
evaluate_model(poly_model_best, X_test, y_test, 'PolynomialRegression', 'date')

Model: PolynomialRegression
Mean Squared Error: 51.65909102182787
Root Mean Squared Error: 7.187425896788632
R2 Score: -0.011631833080994358
