In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset, DataLoader
import sys
sys.path.append('../Models')
from models import TimeSeriesNeuralNetwork, LSTMTimeSeriesModel
import torch.optim as optim
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt


### Reading the merged dataframe

In [None]:
data = pd.read_csv('../Data/Final_table.csv')

### Splitting the dataframe into test and training sets

In [None]:
feature_var = data.iloc[:,[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,25,26,27,28]]
target_var = data['DPL_historical_da']

X_train, X_test, y_train, y_test = train_test_split(feature_var, target_var, test_size=0.2, random_state=156)

# Feature Selection

In [None]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif

# Initiate the SelectKBest model
model_fc = SelectKBest(score_func=f_classif, k=4)

# Fit the model to all data
diabetes_new = model_fc.fit(X_train, y_train)

# Check the selected features
diabetes_new.get_feature_names_out()

In [None]:
optimized_features = feature_var.iloc[:,diabetes_new.get_support()]
optimized_features


In [None]:
target_var

In [None]:
data.columns

# Linear Regression

# Neural Network

## Functions for Module

In [None]:
def train_model (model, train_loader, criterion, optimizer, device, num_epochs=100):
    epoch_losses = []

    for epoch in range(num_epochs):
        batch_losses = []  
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            batch_losses.append(loss.item())
        epoch_loss = sum(batch_losses) / len(batch_losses)
        epoch_losses.append(epoch_loss)
        print(f'Epoch {epoch+1}, Loss: {epoch_loss}')

    return epoch_losses

In [None]:
def plot_loss_over_epochs(epoch_losses):
    plt.figure(figsize=(10, 6))
    plt.plot(epoch_losses, label='Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss Over Epochs')
    plt.legend()
    plt.show()

In [None]:
def test_model(model, X_test_tensor, y_test_tensor):
    with torch.no_grad():
 
        y_pred_tensor = model(X_test_tensor)

        y_pred = y_pred_tensor.cpu().numpy()
        y_true = y_test_tensor.cpu().numpy()

        print(f"predicted value: {y_pred.shape}", f"true value: {y_true.shape}")
    
    mse = mean_squared_error(y_true, y_pred)
    print(f"Mean Squared Error on the Test Set: {mse}")

    r2 = r2_score(y_true, y_pred)
    print(f"R^2 Score: {r2}")

    return y_pred, y_true, mse, r2


In [None]:
def plot_actual_predicted(y_true, y_pred):
    plt.figure(figsize=(10, 6))
    plt.plot(y_true, label='Actual Values')
    plt.plot(y_pred, label='Predicted Values', alpha=0.75)
    plt.xlabel('Sample Index')
    plt.ylabel('Value')
    plt.title('Actual vs. Predicted Values')
    plt.legend()
    plt.show()
    

In [None]:
def plot_distribution_error(errors):
    plt.figure(figsize=(10, 6))
    plt.hist(errors, bins=25, edgecolor='k', alpha=0.7)
    plt.xlabel('Error Value')
    plt.ylabel('Count')
    plt.title('Distribution of Prediction Errors')
    plt.show()

## Convert the data to tensor

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
X_tensor = torch.tensor(feature_var.to_numpy(), dtype=torch.float32)
X_tensor_opti = torch.tensor(optimized_features.to_numpy(), dtype=torch.float32)
y_tensor = torch.tensor(target_var.to_numpy(), dtype=torch.float32)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_tensor_opti, y_tensor, test_size=0.2, random_state=42)

In [None]:
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)


## First Network

In [None]:
model = TimeSeriesNeuralNetwork(sequence_length=1, num_features=X_train.shape[1], output_size=1).to(device)

In [None]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
epoch_losses = train_model(model, train_loader, criterion, optimizer, device, num_epochs=100)


In [None]:
plot_loss_over_epochs(epoch_losses)

In [None]:
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)


X_test_tensor = X_test_tensor.to(device)
y_test_tensor = y_test_tensor.to(device)


model.eval()

y_pred, y_true, mse, r2 = test_model(model, X_test_tensor, y_test_tensor)

In [None]:
plot_actual_predicted(y_true, y_pred)


In [None]:
errors = y_true - y_pred
plot_distribution_error(errors)

## Model Parameters

In [None]:
sequence_length = 1
num_features = X_train.shape[1]    
output_size = 1       
hidden_dim = 50       
num_layers = 2        
dropout_rate = 0.2    



## Making Tensors 3-D

In [None]:
X_train = X_train.view(-1, sequence_length, num_features)
X_test = X_test.view(-1, sequence_length, num_features)

In [None]:
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

## Second Network

In [None]:
model = LSTMTimeSeriesModel(sequence_length, num_features, output_size, hidden_dim, num_layers, dropout_rate).to(device)

In [None]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
epoch_losses = train_model(model, train_loader, criterion, optimizer, device, num_epochs=100)

In [None]:
plot_loss_over_epochs(epoch_losses)

In [None]:
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)


X_test_tensor = X_test_tensor.to(device)
y_test_tensor = y_test_tensor.to(device)

y_pred, y_true, mse, r2 = test_model(model, X_test_tensor, y_test_tensor)

In [None]:
plot_actual_predicted(y_true, y_pred)


In [None]:
errors = y_true - y_pred
plot_distribution_error(errors)