In [42]:
#Without Grid Search
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from keras import models, layers
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import seaborn as sns
import numpy as np

# Load datasets
df_train = pd.read_csv('2015_2020.csv')
df_test = pd.read_csv('2022_2023.csv')
df_train.drop(["DateTime", "DayOfWeek"], axis=1, inplace=True)
df_test.drop(["DateTime", "DayOfWeek"], axis=1, inplace=True)

target_column = 'KCPL'

# Separate features and targets
features_train = df_train.drop(target_column, axis=1)
target_train = df_train[target_column]
features_test = df_test.drop(target_column, axis=1)
target_test = df_test[target_column]

# Normalize features
scaler = StandardScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)

# Train a linear regression model
linear_model = LinearRegression()
linear_model.fit(features_train_scaled, target_train)

# Extract parameters from the linear regression model
linear_intercept = linear_model.intercept_
linear_coefficients = linear_model.coef_

# Make predictions using linear regression model
linear_predictions = linear_model.predict(features_test_scaled)
# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(target_test, linear_predictions)
print('Mean Squared Error for Linear Regression: %s' % mse)
# Calculate Mean Absolute Error (MAE)
mae = mean_absolute_error(target_test, linear_predictions)
print('Mean Absolute Error for Linear Regression: %s' % mae)
# Print linear regression model parameters
print("\nLinear Regression Model Parameters:")
print("Intercept:", linear_intercept)
print("Coefficients:", linear_coefficients)

# # Plot predictions for the linear regression model
# plt.scatter(target_test, linear_predictions, alpha=0.5, label='Linear Regression Model')
# plt.xlabel('Actual Values')
# plt.ylabel('Predicted Values')
# plt.title('Actual vs Predicted Values')
# plt.legend()
# plt.show()



Mean Squared Error for Linear Regression: 208341.92906055416
Mean Absolute Error for Linear Regression: 366.1355607447715

Linear Regression Model Parameters:
Intercept: 1898.4042292464978
Coefficients: [ 2.00258250e+01 -1.00494213e+02 -3.65846640e+01 -6.61324928e+03
 -5.74774016e+02 -3.09320450e+01  6.74375783e+03  7.14210376e+01
 -1.50066626e-11 -6.01338497e+00  6.79859429e+01  1.04506768e+01]


In [2]:
import joblib
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,mean_absolute_error
import matplotlib.pyplot as plt

# Load datasets
df_train = pd.read_csv('./dataset/train/2015_2020.csv')
df_test = pd.read_csv('./dataset/test/2022_2023.csv')
df_train.drop(["DateTime", "DayOfWeek"], axis=1, inplace=True)
df_test.drop(["DateTime", "DayOfWeek"], axis=1, inplace=True)

target_column = 'KCPL'

# Separate features and targets
features_train = df_train.drop(target_column, axis=1)
target_train = df_train[target_column]
features_test = df_test.drop(target_column, axis=1)
target_test = df_test[target_column]

# Normalize features
scaler = StandardScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)

# Perform Grid Search
param_grid = {
    'fit_intercept': [True, False],
    'copy_X': [True, False],
    'n_jobs': [None, 1],
    'positive': [True, False]
}

grid_search = GridSearchCV(estimator=LinearRegression(), param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(features_train_scaled, target_train)

best_params = grid_search.best_params_
print('Best Hyperparameters for Linear Regression: %s' % best_params)

best_linear_model = LinearRegression(**best_params)
best_linear_model.fit(features_train_scaled, target_train)

joblib.dump(best_linear_model, "best-LR-model.pkl")

# Make predictions using the best linear regression model
y_pred_best = best_linear_model.predict(features_test_scaled)

# Extract parameters from the best linear regression model
linear_intercept_best = best_linear_model.intercept_
linear_coefficients_best = best_linear_model.coef_

mse_best = mean_squared_error(target_test, y_pred_best)
print('Mean Squared Error for Best Linear Regression Model: %s' % mse_best)

# Calculate Mean Absolute Error (MAE)
mae = mean_absolute_error(target_test, y_pred_best)
print('Mean Absolute Error for Linear Regression: %s' % mae)

# Print parameters of the best linear regression model
print("\nBest Linear Regression Model Parameters:")
print("Intercept:", linear_intercept_best)
print("Coefficients:", linear_coefficients_best)


Best Hyperparameters for Linear Regression: {'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}
Mean Squared Error for Best Linear Regression Model: 208341.92906054974


NameError: name 'linear_predictions' is not defined

In [34]:
# #Grid Search
# import pandas as pd
# from sklearn.model_selection import GridSearchCV
# from sklearn.linear_model import LinearRegression
# from sklearn import preprocessing
# from sklearn.metrics import mean_squared_error

# train_data = pd.read_csv('2015_2020.csv')
# test_data = pd.read_csv('2022_2023.csv')
# #train_data.head()
# response = "KCPL"
# features = ["Year", "Quarter", "IsWeekend", "Month", "DayNumber_Year", "DayNumber_Week", "DayOfYear", "Hour", "Minute", "WeekNumber", "temperature_f", "weather_code"]
# x_train = train_data[features]
# y_train = train_data[response]
# x_test = test_data[features]
# y_test = test_data[response]

# x_train = preprocessing.normalize(x_train)
# x_test = preprocessing.normalize(x_test)

# linear_model = LinearRegression()
# linear_model.fit(x_train, y_train)
# y_pred = linear_model.predict(x_test)
# mse = mean_squared_error(y_test, y_pred)
# print('Mean Squared Error for Linear Regression: %s' % mse)

# param_grid = {
#     'fit_intercept': [True, False],
#     'copy_X': [True, False],
#     'n_jobs': [None, 1],
#     'positive': [True, False]
# }



# grid_search = GridSearchCV(estimator=LinearRegression(), param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
# grid_search.fit(x_train, y_train)

# best_params = grid_search.best_params_
# print('Best Hyperparameters for Linear Regression: %s' % best_params)

# best_linear_model = LinearRegression(**best_params)
# best_linear_model.fit(x_train, y_train)

# y_pred_best = best_linear_model.predict(x_test)

# # Extract parameters from the linear regression model
# linear_intercept = best_linear_model.intercept_
# linear_coefficients = best_linear_model.coef_

# mse_best = mean_squared_error(y_test, y_pred_best)
# print('Mean Squared Error for Best Linear Regression Model: %s' % mse_best)
# # Print linear regression model parameters
# print("\nLinear Regression Model Parameters:")
# print("Intercept:", linear_intercept)
# print("Coefficients:", linear_coefficients)


Mean Squared Error for Linear Regression: 203651.842386723
Best Hyperparameters for Linear Regression: {'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}
Mean Squared Error for Best Linear Regression Model: 203651.842386723

Linear Regression Model Parameters:
Intercept: -106569.76836218817
Coefficients: [ 1.11193647e+05 -1.51132432e+05 -2.07129268e+05 -5.79072121e+06
 -1.93704982e+05 -3.18698389e+04  2.01792519e+05  1.63748635e+04
 -4.65661287e-10  1.77146790e+03 -2.41657799e+03  2.47501185e+03]


In [36]:
# import matplotlib.pyplot as plt

# # Assuming you have a column in your test_data for time or date
# time_column = "DateTime"  # Replace with the actual column name
# plt.figure(figsize=(12, 6))
# plt.plot(test_data[time_column], y_test, label='Actual', marker='o')
# plt.plot(test_data[time_column], y_pred_best, label='Best Linear Regression', marker='o')
# plt.title('Actual vs Predicted for Best Linear Regression Model')
# plt.xlabel('Time')
# plt.ylabel('Response Variable')
# plt.legend()
# plt.show()


In [37]:
# plt.scatter(y_test, y_pred)
# plt.xlabel('Actual Values')
# plt.ylabel('Predicted Values')
# plt.title('Actual vs Predicted Values')
# plt.show()



In [38]:
# #Wendy
# import pandas as pd
# from sklearn.preprocessing import StandardScaler
# from sklearn.linear_model import LinearRegression
# from sklearn.metrics import mean_absolute_error, mean_squared_error
# import matplotlib.pyplot as plt

# # Load datasets
# df_train = pd.read_csv('/content/2015_2020.csv')
# df_test = pd.read_csv('/content/2022_2023.csv')
# df_train.drop(["DateTime", "DayOfWeek"], axis=1, inplace=True)
# df_test.drop(["DateTime", "DayOfWeek"], axis=1, inplace=True)

# target_column = 'KCPL'

# # Separate features and targets
# features_train = df_train.drop(target_column, axis=1)
# target_train = df_train[target_column]
# features_test = df_test.drop(target_column, axis=1)
# target_test = df_test[target_column]

# # Normalize features
# scaler = StandardScaler()
# features_train_scaled = scaler.fit_transform(features_train)
# features_test_scaled = scaler.transform(features_test)

# def train_and_evaluate_linear_regression(features_train_scaled, target_train, features_test_scaled, target_test):
#     # Create and train the Linear Regression model
#     model = LinearRegression()
#     model.fit(features_train_scaled, target_train)

#     # Make predictions
#     train_predictions = model.predict(features_train_scaled)
#     test_predictions = model.predict(features_test_scaled)

#     # Evaluate the model
#     train_mae = mean_absolute_error(target_train, train_predictions)
#     test_mae = mean_absolute_error(target_test, test_predictions)
#     train_mse = mean_squared_error(target_train, train_predictions)
#     test_mse = mean_squared_error(target_test, test_predictions)

#     print(f"Train MSE: {train_mse}, Test MSE: {test_mse}")
#     print(f"Train MAE: {train_mae}, Test MAE: {test_mae}")


#     # Extract parameters from the linear regression model
#     linear_intercept = model.intercept_
#     linear_coefficients = model.coef_



#     # Print linear regression model parameters
#     print("\nLinear Regression Model Parameters:")
#     print("Intercept:", linear_intercept)
#     print("Coefficients:", linear_coefficients)



#     # Optionally, you can plot the predicted vs actual values
#     plt.scatter(target_test, test_predictions)
#     plt.xlabel('True Values')
#     plt.ylabel('Predictions')
#     plt.title('Linear Regression: True Values vs Predictions')
#     plt.show()



# # Train and evaluate the linear regression model
# train_and_evaluate_linear_regression(features_train_scaled, target_train, features_test_scaled, target_test)
