In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import torch


In [3]:
raw_data_A = pd.read_csv('data/data_A.csv')
raw_data_B = pd.read_csv('data/data_B.csv')
raw_data_C = pd.read_csv('data/data_C.csv')

In [4]:
data_A = raw_data_A.select_dtypes(include=[np.number])
data_B = raw_data_B.select_dtypes(include=[np.number])
data_C = raw_data_C.select_dtypes(include=[np.number])

In [5]:
scaler_1 = MinMaxScaler()
scaler_2 = MinMaxScaler()
scaler_3 = MinMaxScaler()

In [6]:
split_up_index = int(len(data_A)*0.8)

In [7]:
scaled_train_data_A = scaler_1.fit_transform(data_A[:split_up_index].values)
scaled_train_data_B = scaler_2.fit_transform(data_B[:split_up_index].values)
scaled_train_data_C = scaler_3.fit_transform(data_C[:split_up_index].values)

In [8]:
scaled_test_data_A = scaler_1.transform(data_A[split_up_index:].values)
scaled_test_data_B = scaler_2.transform(data_B[split_up_index:].values)
scaled_test_data_C = scaler_3.transform(data_C[split_up_index:].values)

In [66]:
def create_sequences_torch(data_A, data_B, data_C, min_length=12):
    output_seq = []
    target = []
    
    for x in range(min_length, len(data_A) - 1):
        inner_seq = torch.tensor(np.column_stack((
            data_A[:x], 
            data_B[:x], 
            data_C[:x]
        )), dtype=torch.float32)
        
        output_seq.append(inner_seq)
        target.append(data_C[x + 1])  

    # Pad sequences
    padded_seq = torch.nn.utils.rnn.pad_sequence(output_seq, batch_first=True, padding_value=0)

    return padded_seq, torch.tensor(target, dtype=torch.float32)

In [67]:
train_seq, train_targets = create_sequences_torch(scaled_train_data_A, scaled_train_data_B, scaled_train_data_C)
test_seq, test_targets = create_sequences_torch(scaled_test_data_A, scaled_test_data_B, scaled_test_data_C)

In [68]:
train_seq[1]

tensor([[0.1404, 0.0000, 0.0126],
        [0.0084, 0.0244, 0.0026],
        [0.0063, 0.0461, 0.0192],
        [0.0000, 0.0562, 0.0372],
        [0.0152, 0.0612, 0.0240],
        [0.0347, 0.0690, 0.0415],
        [0.0363, 0.0701, 0.0292],
        [0.1951, 0.0772, 0.0220],
        [0.1777, 0.0446, 0.0177],
        [0.0641, 0.0282, 0.0609],
        [0.0626, 0.0448, 0.2162],
        [0.3375, 0.2251, 0.7578],
        [0.2497, 0.0114, 0.0063],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.000

In [69]:
from torch.utils.data import DataLoader, Dataset

# Fix the CustomDataset class
class CustomDataset(Dataset):
    def __init__(self, input_seq, targets):
        self.input_seq = input_seq
        self.targets = targets

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        return self.input_seq[idx], self.targets[idx]

# Create datasets and dataloaders
train_set = CustomDataset(train_seq, train_targets)
test_set = CustomDataset(test_seq, test_targets)

train_loader = DataLoader(train_set, batch_size=64, shuffle=False)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False) 

In [61]:
from torch import nn
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, dropout, num_layers):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0,
            bidirectional=True
        )
        
        self.linear = nn.Linear(hidden_size * 2, 1)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, lengths):  # Added lengths parameter
        packed = nn.utils.rnn.pack_padded_sequence(
            x, lengths.cpu(), batch_first=True, enforce_sorted=False
        )
        packed_out, (hn, cn) = self.lstm(packed)
        last_hidden = torch.cat((hn[-2], hn[-1]), dim=1)
        out = self.linear(last_hidden)
        return out.squeeze(-1)

In [64]:
# Corrected Training Function

input_size = 3 
hidden_size = 128
num_layers = 2
dropout = 0.2
learning_rate = 0.001
num_epochs = 50

# Create model
model = LSTMModel(
    input_size=input_size,
    hidden_size=hidden_size,
    num_layers=num_layers,
    dropout=dropout
)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Modified training function
def train_model(model, train_loader, criterion, optimizer, num_epochs=50):
    model.train()
    train_losses = []
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        
        for padded_sequences, targets, lengths in train_loader:  # Added lengths
            padded_sequences = padded_sequences.to(device)
            targets = targets.to(device)
            
            optimizer.zero_grad()
            outputs = model(padded_sequences, lengths)  # Pass lengths
            loss = criterion(outputs, targets)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Gradient clipping
            optimizer.step()
            
            running_loss += loss.item()
            
        epoch_loss = running_loss / len(train_loader)
        train_losses.append(epoch_loss)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')
    
    return train_losses

# Corrected Evaluation Function
def evaluate_model(model, test_loader, criterion):
    model.eval()
    test_loss = 0.0
    predictions = []
    actuals = []
    
    with torch.no_grad():
        for padded_sequences, targets, lengths in test_loader:  # Added lengths
            padded_sequences = padded_sequences.to(device)
            targets = targets.to(device)
            
            outputs = model(padded_sequences, lengths)  # Pass lengths
            loss = criterion(outputs, targets)
            test_loss += loss.item()
            
            predictions.extend(outputs.cpu().numpy())
            actuals.extend(targets.cpu().numpy())
    
    avg_test_loss = test_loss / len(test_loader)
    print(f'Test Loss: {avg_test_loss:.4f}')
    return predictions, actuals


def plot_results(predictions, actuals):
    """Plot the predicted vs actual values"""
    plt.figure(figsize=(12, 6))
    plt.plot(actuals, label='Actual Values')
    plt.plot(predictions, label='Predicted Values')
    plt.title('Time Series Prediction')
    plt.xlabel('Time Steps')
    plt.ylabel('Value')
    plt.legend()
    plt.grid(True)
    plt.show()

In [65]:

from torch import optim

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
train_losses = train_model(model, train_loader, criterion, optimizer, num_epochs)

# Plot training loss
plt.figure(figsize=(10, 5))
plt.plot(train_losses)
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)
plt.show()

# Evaluate the model
predictions, actuals = evaluate_model(model, test_loader, criterion)

# Plot results
plot_results(predictions, actuals)

ValueError: not enough values to unpack (expected 3, got 2)