In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [3]:
# Read data
df = pd.read_csv("data.csv")
df = df.iloc[:-1]  # Drop last row with NaN
df['date'] = pd.to_datetime(df['date'])

# Drop early rows with missing 'AI'
df = df.dropna()

# Define target and features
target_col = 'boeing'
feature_cols = df.columns.difference(['date', target_col])

# Set index for easier alignment
df = df.set_index('date')


In [5]:
# Standardize target and features separately
target_scaler = StandardScaler()
X_scaler = StandardScaler()

y_scaled = target_scaler.fit_transform(df[[target_col]])
X_scaled = X_scaler.fit_transform(df[feature_cols])

# Convert to numpy arrays
y_scaled = y_scaled.flatten()


In [7]:
def create_sequences(X, y, seq_len):
    xs, ys = [], []
    for i in range(len(X) - seq_len):
        xs.append(X[i:i+seq_len])
        ys.append(y[i+seq_len])
    return np.array(xs), np.array(ys)

seq_len = 30
X_seq, y_seq = create_sequences(X_scaled, y_scaled, seq_len)


In [9]:
split_idx = int(len(X_seq) * 0.7)

X_train = torch.tensor(X_seq[:split_idx], dtype=torch.float32)
y_train = torch.tensor(y_seq[:split_idx], dtype=torch.float32)

X_test = torch.tensor(X_seq[split_idx:], dtype=torch.float32)
y_test = torch.tensor(y_seq[split_idx:], dtype=torch.float32)

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [11]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # Take last time step
        return out

input_size = X_train.shape[2]
model = LSTMModel(input_size=input_size, hidden_size=64, num_layers=2, output_size=1)


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 100
model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.unsqueeze(1).to(device)

        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    if (epoch+1) % 20 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader):.6f}")
