In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, TensorDataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [2]:
def add_up_down_columns(df):
    # Create empty 'up' and 'down' columns
    df['up'] = 0
    df['down'] = 0
    
    # Loop over the rows (skipping the first row)
    for i in range(1, len(df)):
        if df.loc[i, '4. close'] > df.loc[i-1, '4. close']:
            df.loc[i, 'up'] = 1
        else:
            df.loc[i, 'down'] = 1
    
    return df

df = pd.read_csv('market_data/TimeSeries/AAPL.csv')

df = add_up_down_columns(df)

df.head()

Unnamed: 0,date,1. open,2. high,3. low,4. close,5. adjusted close,6. volume,7. dividend amount,8. split coefficient,up,down
0,2023-02-24,147.11,147.19,145.7202,146.71,146.71,55469606.0,0.0,1.0,0,0
1,2023-02-23,150.09,150.34,147.24,149.4,149.4,48394249.0,0.0,1.0,1,0
2,2023-02-22,148.87,149.95,147.16,148.91,148.91,51011305.0,0.0,1.0,0,1
3,2023-02-21,150.2,151.3,148.405,148.48,148.48,58867230.0,0.0,1.0,0,1
4,2023-02-17,152.35,153.0,150.85,152.55,152.55,59144118.0,0.0,1.0,1,0


In [3]:
def df_to_tensor(df):
    #right now this is just getting columns 1-4 (open, low, high, close)
    inputs = torch.from_numpy(df.iloc[:, 1:5].values.astype('float32'))
    outputs = torch.from_numpy(df.iloc[:, 9:11].values.astype('float32'))
    return inputs, outputs

inputs, outputs = df_to_tensor(df)
print(inputs.shape)
print(outputs.shape)

torch.Size([100, 4])
torch.Size([100, 2])


In [4]:
dataset = TensorDataset(inputs, outputs)

val_percent = 0.2
num_rows = len(df.index)
val_size = int(num_rows * val_percent)
train_size = num_rows - val_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

In [5]:
batch_size = 32
train_loader = DataLoader(train_ds, batch_size, shuffle = True, num_workers = 0)
val_loader = DataLoader(val_ds, batch_size, num_workers = 0)

In [6]:
class baselinePredictor(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 100)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(100, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

input_size = 4
output_size = 2
model = baselinePredictor(input_size, output_size)

In [7]:
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.001)
num_epochs = 100

In [8]:
for epoch in range(num_epochs):
    for data in train_loader:
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')



Epoch 1/100, Loss: 712.7229
Epoch 2/100, Loss: 703.3522
Epoch 3/100, Loss: 697.4349
Epoch 4/100, Loss: 700.0151
Epoch 5/100, Loss: 694.1405
Epoch 6/100, Loss: 693.3485
Epoch 7/100, Loss: 685.8510
Epoch 8/100, Loss: 690.1052
Epoch 9/100, Loss: 679.3253
Epoch 10/100, Loss: 671.9378
Epoch 11/100, Loss: 671.2245
Epoch 12/100, Loss: 668.8616
Epoch 13/100, Loss: 656.6287
Epoch 14/100, Loss: 657.6114
Epoch 15/100, Loss: 652.0363
Epoch 16/100, Loss: 651.3932
Epoch 17/100, Loss: 652.3797
Epoch 18/100, Loss: 640.3760
Epoch 19/100, Loss: 639.7733
Epoch 20/100, Loss: 631.1075
Epoch 21/100, Loss: 633.7534
Epoch 22/100, Loss: 625.1565
Epoch 23/100, Loss: 626.2050
Epoch 24/100, Loss: 620.8696
Epoch 25/100, Loss: 617.1561
Epoch 26/100, Loss: 611.8804
Epoch 27/100, Loss: 608.2168
Epoch 28/100, Loss: 610.8741
Epoch 29/100, Loss: 607.2321
Epoch 30/100, Loss: 600.4810
Epoch 31/100, Loss: 600.0148
Epoch 32/100, Loss: 588.6575
Epoch 33/100, Loss: 585.1290
Epoch 34/100, Loss: 584.7149
Epoch 35/100, Loss: 575

In [9]:
#not very confident this is doing what I want it to do


# def get_num_correct(preds, labels):
#     return preds.argmax(dim=1).eq(labels.argmax(dim=1)).sum().item()

# total_loss = 0
# total_correct = 0

# for data in val_loader:
#     inputs, labels = data
#     outputs = model(inputs)
#     loss = criterion(outputs, labels)
#     total_loss += loss.item()
#     total_correct += get_num_correct(outputs, labels)

# print(f'Loss: {total_loss/len(val_loader):.4f}, Accuracy: {total_correct/len(val_ds):.4f}')

In [10]:
def single_prediction(model, inputs):
    with torch.no_grad():
        outputs = model(inputs)
        return torch.clamp(outputs, 0, 1)

test_input = torch.tensor([[93.53, 94.14, 92.31, 93.5]])
single_prediction(model, test_input)
# returns [[0, 1]]
# our nn thinks it will go down tomorrow


tensor([[0., 0.]])