In [31]:
from rfe import df_selected
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam



In [19]:
X = df_selected.drop(columns=['Close'])
y = df_selected['Close']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_array = X_train.values
X_test_array= X_test.values
y_train_array= y_train.values
y_test_array = y_test.values

In [20]:
X_train_tensor = torch.tensor(X_train_array, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_array, dtype=torch.float32)

# Convert test NumPy arrays to PyTorch tensors
X_test_tensor = torch.tensor(X_test_array, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_array, dtype=torch.float32)

In [21]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

In [34]:
train_loader = DataLoader(train_dataset, batch_size=3, shuffle=True)

# Create DataLoader for test set
test_loader = DataLoader(test_dataset, batch_size=3, shuffle=False)

In [35]:
X_train_batch, y_train_batch = next(iter(train_loader))

# Load a batch of data from test_loader
X_test_batch, y_test_batch = next(iter(test_loader))
print("Training batch - Features shape:", X_train_batch.shape)
print("Training batch - Target shape:", y_train_batch.shape)
print("Test batch - Features shape:", X_test_batch.shape)
print("Test batch - Target shape:", y_test_batch.shape)



Training batch - Features shape: torch.Size([3, 10])
Training batch - Target shape: torch.Size([3])
Test batch - Features shape: torch.Size([3, 10])
Test batch - Target shape: torch.Size([3])


In [40]:
from torch import nn
import torch

class ShallowRegressionLSTM(nn.Module):
    def __init__(self, input_size, hidden_units):
        super().__init__()
        self.input_size = input_size
        self.hidden_units = hidden_units
        self.num_layers = 1

        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_units,
            batch_first=True,
            num_layers=self.num_layers
        )

        self.linear = nn.Linear(in_features=self.hidden_units, out_features=1)

    def forward(self, x):
      batch_size = x.size(0)
      h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).to(x.device)
      c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).to(x.device)

      # Ensure input has the correct shape
      x = x.unsqueeze(1)  # Add a singleton dimension for the sequence length

      _, (hn, _) = self.lstm(x, (h0, c0))
      out = self.linear(hn[-1]).squeeze()

      return out
 


In [41]:
learning_rate = 5e-4
num_hidden_units = 8

# Initialize the model
model = ShallowRegressionLSTM(input_size=10, hidden_units=num_hidden_units)

# Define the loss function
loss_function = nn.MSELoss()

# Define the optimizer
optimizer = Adam(model.parameters(), lr=learning_rate)

In [42]:
def train_model(train_loader, model, loss_function, optimizer):
    num_batches = len(train_loader)
    total_loss = 0
    model.train()

    for X, y in train_loader:
        output = model(X)
        loss = loss_function(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / num_batches
    print(f"Train loss: {avg_loss}")

def test_model(test_loader, model, loss_function):

    num_batches = len(test_loader)
    total_loss = 0

    model.eval()
    with torch.no_grad():
        for X, y in test_loader:
            output = model(X)
            total_loss += loss_function(output, y).item()

    avg_loss = total_loss / num_batches
    print(f"Test loss: {avg_loss}")


print("Untrained test\n--------")
test_model(test_loader, model, loss_function)

for ix_epoch in range(30):
    print(f"Epoch {ix_epoch}\n---------")
    train_model(train_loader, model, loss_function, optimizer=optimizer)
    test_model(test_loader, model, loss_function)

Untrained test
--------
Test loss: 182676.26132150425

Epoch 0
---------


  return F.mse_loss(input, target, reduction=self.reduction)


Train loss: 202940.2069313033
Test loss: 182255.7177899894

Epoch 1
---------
Train loss: 202486.65194396322
Test loss: 181828.393819518

Epoch 2
---------
Train loss: 202033.51699093817
Test loss: 181405.54834811972

Epoch 3
---------
Train loss: 201581.35600179905
Test loss: 180982.58481197033

Epoch 4
---------
Train loss: 201129.82877048908
Test loss: 180560.48809917903

Epoch 5
---------
Train loss: 200680.07486007462
Test loss: 180140.39979475635

Epoch 6
---------
Train loss: 200231.7319013193
Test loss: 179720.92919094278

Epoch 7
---------
Train loss: 199784.11470965485
Test loss: 179302.27289128708

Epoch 8
---------
Train loss: 199336.73258012393
Test loss: 178883.14017809852

Epoch 9
---------
Train loss: 198888.68701692432
Test loss: 178463.56586003708

Epoch 10
---------
Train loss: 198440.85835054636
Test loss: 178044.83754303496

Epoch 11
---------
Train loss: 197993.99447378065
Test loss: 177626.0009600106

Epoch 12
---------
Train loss: 197547.62949760127
Test loss: 1

In [62]:
import torch
import pandas as pd

def generate_predictions(data_loader, model, target_column):
    predictions = []
    
    model.eval()
    
    with torch.no_grad():
        for X, _ in data_loader:
            y_pred = model(X)
            if y_pred.numel() > 0:  # Check if y_pred is not empty
                predictions.append(y_pred)
    
    if predictions:  # Check if predictions list is not empty
        print(predictions)
        predictions = torch.cat(predictions, dim=0)
    else:
        predictions = torch.tensor([])  # Handle case where no predictions were made
    
    return predictions


def create_prediction_dataframe(train_loader, test_loader, model, target_column):
    train_predictions = generate_predictions(train_loader, model, target_column)
    test_predictions = generate_predictions(test_loader, model, target_column)
    
    print("Train predictions shape:", train_predictions.shape)
    print("Test predictions shape:", test_predictions.shape)
    
    train_data = train_loader.dataset.data
    test_data = test_loader.dataset.data
    
    print("Train data shape:", train_data.shape)
    print("Test data shape:", test_data.shape)
    
    train_data[target_column + "_predicted"] = train_predictions.numpy()
    test_data[target_column + "_predicted"] = test_predictions.numpy()
    
    combined_data = pd.concat([train_data, test_data], axis=0)
    
    return combined_data

# Call the function and check the results
combined_data = create_prediction_dataframe(train_loader, test_loader, model, 'pred_value')
print("Combined data shape:", combined_data.shape)



[tensor([16.8160, 16.8160,  9.1840]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 11.7846]), tensor([16.8160, 11.2422, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([11.7846, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160,  9.1840, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), tensor([16.8160, 16.8160, 16.8160]), 

RuntimeError: zero-dimensional tensor (at position 117) cannot be concatenated