In [1]:
import pandas as pd

# Load AAPL data (change path if needed)
df = pd.read_csv("AAPL_daily.csv")

# Sort by date
df = df.sort_values(by="Date").reset_index(drop=True)

# Add 1-day trend label: 1 if next close is higher, else 0
df["Trend"] = (df["Close"].shift(-1) > df["Close"]).astype(int)

# Drop the last row (no label)
df = df.iloc[:-1].reset_index(drop=True)

# Optional: add a dummy ticker_id = 0 for compatibility with model
df["ticker_id"] = 0

print("✅ Step 1 & 2 complete. Sample:")
print(df.head())


✅ Step 1 & 2 complete. Sample:
         Date                Open                High                Low  \
0  2017-03-29  33.373452477074316   33.56159923128349  33.25963937410427   
1  2017-03-30   33.49190882568726   33.56391388701477  33.33163766634339   
2  2017-03-31    33.3827447550176  33.510497408339084  33.21782704904723   
3  2017-04-03   33.38041493515435   33.47564548544359  33.22711178943953   
4  2017-04-04   33.27357180868976  33.654504705427456  33.25498928393985   

                Close     Volume ticker company  Trend  ticker_id  
0   33.47565460205078  116760000   AAPL   Apple      0          0  
1  33.431514739990234   84829200   AAPL   Apple      0          0  
2   33.36880874633789   78646800   AAPL   Apple      1          0  
3  33.378089904785156   79942800   AAPL   Apple      1          0  
4   33.62663269042969   79565600   AAPL   Apple      0          0  


In [2]:
# Split last 5 rows as test, rest as train/val
df = df.sort_values(by="Date").reset_index(drop=True)

if len(df) <= 95:
    print("⚠️ Not enough data (need at least 95 rows).")
else:
    df_train_val = df.iloc[:-5].reset_index(drop=True)
    df_test = df.iloc[-5:].reset_index(drop=True)

    print("✅ Step 4 complete.")
    print("Train+Val shape:", df_train_val.shape)
    print("Test shape:", df_test.shape)


✅ Step 4 complete.
Train+Val shape: (2005, 10)
Test shape: (5, 10)


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader, random_split

# Parameters
INPUT_WINDOW = 90
FEATURE_COLS = ['Open', 'High', 'Low', 'Close', 'Volume']

# Dataset for AAPL
class AAPLDataset(Dataset):
    def __init__(self, df, input_window):
        self.samples = []
        features = df[FEATURE_COLS].astype(float).values
        labels = df["Trend"].values

        for i in range(len(df) - input_window):
            x = features[i:i+input_window]
            y = labels[i+input_window]  # next-day trend
            self.samples.append((x, y))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        x, y = self.samples[idx]
        return {
            "x": torch.tensor(x, dtype=torch.float32),
            "y": torch.tensor([y], dtype=torch.float32)
        }

# Create dataset and loaders
aapl_dataset = AAPLDataset(df_train_val, INPUT_WINDOW)
train_size = int(0.9 * len(aapl_dataset))
val_size = len(aapl_dataset) - train_size

train_dataset, val_dataset = random_split(aapl_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

print("✅ Step 5 complete.")
print(f"Train samples: {len(train_dataset)}, Val samples: {len(val_dataset)}")


✅ Step 5 complete.
Train samples: 1723, Val samples: 192


In [4]:
import torch.nn as nn

# LSTM Model (No ticker embedding)
class AAPLLSTM(nn.Module):
    def __init__(self, input_dim=5, hidden_dim=64):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        last = out[:, -1, :]  # last timestep output
        return self.fc(last)

# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AAPLLSTM().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training Loop
best_val_loss = float("inf")
EPOCHS = 20

for epoch in range(EPOCHS):
    model.train()
    train_loss, train_correct = 0, 0

    for batch in train_loader:
        x = batch["x"].to(device)
        y = batch["y"].to(device)

        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds = torch.sigmoid(output) > 0.5
        train_correct += (preds == y.bool()).sum().item()

    model.eval()
    val_loss, val_correct = 0, 0

    with torch.no_grad():
        for batch in val_loader:
            x = batch["x"].to(device)
            y = batch["y"].to(device)
            output = model(x)
            loss = criterion(output, y)
            val_loss += loss.item()
            preds = torch.sigmoid(output) > 0.5
            val_correct += (preds == y.bool()).sum().item()

    train_loss /= len(train_loader)
    val_loss /= len(val_loader)
    train_acc = train_correct / len(train_dataset)
    val_acc = val_correct / len(val_dataset)

    print(f"Epoch {epoch+1}/{EPOCHS} | "
          f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_aapl_model.pt")
        print("✔️ Saved best AAPL model")

print("✅ AAPL Model Training Complete. Best model saved as best_aapl_model.pt")


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.