In [10]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [11]:
# Importing cleaned datasets
training_data = pd.read_csv("data/train_data.csv")
test_data = pd.read_csv("data/test_data.csv")

In [12]:
# Splitting the features and the target for datasets
features = ["Plant_Production_GWh", "Population_k", "tmax"]
target = ["Max_Demand_GW"]

x_train = training_data[features]
y_train = training_data[target]

x_test = test_data[features]
y_test = test_data[target]

In [None]:
# Linear Regression
linear_regression = LinearRegression()
linear_regression.fit(x_train, y_train)

In [None]:
# Make predictions on the test dataset
y_test_prediction = linear_regression.predict(x_test)

# Evaluate the model on the test dataset
mse_test = mean_squared_error(y_test, y_test_prediction)
print(f"Mean Squared Error on Test Set: {mse_test}")
mae_test= mean_absolute_error(y_test, y_test_prediction)
print(f"Mean Absolute Error on Test Set: {mae_test}")

In [None]:
# Generate data for the ideal line (y = x)
x_ideal = np.linspace(y_test.min(), y_test.max(), 100)
y_ideal = x_ideal

# Plotting the actual vs predicted values on the test set
plt.scatter(y_test, y_test_prediction, label="Actual vs Predicted")
plt.plot(x_ideal, y_ideal, color="red", label="Ideal Line", linestyle='--')
plt.title("Actual vs Predicted Values on Validation Dataset")
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")
plt.legend()
plt.show()

In [None]:
# Calculating the MAPE on the Validation dataset
def mean_absolute_percentage_error(actual, predictions): 
    actual, predictions = np.array(actual), np.array(predictions)
    return np.mean(np.abs((actual - predictions) / actual)) * 100

mape = mean_absolute_percentage_error(y_test, y_test_prediction)
print(f'\nMAPE on test dataset: {mape:.2f}%')

In [None]:
# Plot predictions vs actual values
plt.figure(figsize=(12,6))
plt.plot(test_data["Date"], test_data["Max_Demand_GW"], color="blue", label="True Values")
plt.plot(test_data["Date"], y_test_prediction, color="red", label="Predictions")
plt.xticks(rotation=90)
plt.xlabel("Date")
plt.ylabel("Max_Demand_GW")
plt.legend()
plt.show()