In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, TensorDataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [2]:
# in the future this should be done in setup but I just did it here for now

# this is actually backward of what it should be since the df starts at present and goes back in time
def add_up_column(df):
    # Create empty 'up' and 'down' columns
    df['up'] = 0
    
    # Loop over the rows (skipping the first row)
    for i in range(1, len(df)):
        if df.loc[i, '4. close'] > df.loc[i-1, '4. close']:
            df.loc[i, 'up'] = 1
    return df

df = pd.read_csv('market_data/TimeSeries/AAPL.csv')

df = add_up_column(df)


In [3]:
# neural networks require tensors, so we need to convert our dataframes to tensors

def df_to_tensor(df):
    #right now this is just getting columns 1-4 (open, low, high, close)
    inputs = torch.from_numpy(df.iloc[:, 1:5].values.astype('float32'))
    outputs = torch.from_numpy(df.iloc[:, 9:].values.astype('float32'))
    return inputs, outputs


inputs, outputs = df_to_tensor(df)
print(inputs.shape)
print(outputs.shape)

torch.Size([100, 4])
torch.Size([100, 1])


In [4]:
# making a training and validation dataset
# just random splitting for now 

dataset = TensorDataset(inputs, outputs)

val_percent = 0.2
num_rows = len(df.index)
val_size = int(num_rows * val_percent)
train_size = num_rows - val_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

In [5]:
# pytorch likes to use data loaders to load data in batches

batch_size = 1
train_loader = DataLoader(train_ds, batch_size, shuffle = True, num_workers = 0)
val_loader = DataLoader(val_ds, batch_size, num_workers = 0)

In [6]:
class baselinePredictor(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        # input size -> 100 -> output size, is baseline nn architecture
        self.sigmoid = nn.Sigmoid()
        self.fc1 = nn.Linear(input_size, 100)
        self.sigmoid2 = nn.Sigmoid()
        self.fc2 = nn.Linear(100, output_size)
        self.sigmoid3 = nn.Sigmoid()

    def forward(self, x):
        x = self.sigmoid(x)
        x = self.fc1(x)
        x = self.sigmoid2(x)
        x = self.fc2(x)
        x = self.sigmoid3(x)
        return x

# input size is 4 because we are using open, low, high, close
# output size is 2 because we are predicting up or down
input_size = 4
output_size = 1
model = baselinePredictor(input_size, output_size)

In [7]:
# hyperparameters for training
# will need to change these a bunch to find out what works best
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.0001)
num_epochs = 100

In [8]:
for epoch in range(num_epochs):
    for data in train_loader:
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    if(epoch % 20 == 0 or epoch == num_epochs - 1):
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')



Epoch 1/100, Loss: 0.8341
Epoch 21/100, Loss: 0.6660
Epoch 41/100, Loss: 0.7009
Epoch 61/100, Loss: 0.6752
Epoch 81/100, Loss: 0.6714
Epoch 100/100, Loss: 0.7185


In [9]:
# not working yet

# def get_accuracy(model, loader):
#     num_correct = 0
#     num_samples = 0
#     model.eval()
#     with torch.no_grad():
#         for x, y in loader:
#             outputs = model(x)
#             _, predictions = outputs.max(1)
#             num_correct += (predictions == y).sum()
#             num_samples += predictions.size(0)
#         print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')

# get_accuracy(model, val_loader)



In [13]:
# test out some sample predictions with the trained model

def single_prediction(model, inputs):
    with torch.no_grad():
        outputs = model(inputs)
        # uncomment to see the raw outputs
        # return outputs
        return torch.clamp(outputs, 0, 1)


In [14]:
testdf = pd.read_csv('market_data/TimeSeries/AMZN.csv')

testInputs, testOutputs = df_to_tensor(testdf)

for inputs in testInputs:
    print(inputs)
    print(single_prediction(model, inputs)) 


tensor([93.5300, 94.1400, 92.3186, 93.5000])
tensor([0.4878])
tensor([96.1200, 96.4300, 93.6700, 95.8200])
tensor([0.4878])
tensor([95.1000, 97.0100, 94.8000, 95.7900])
tensor([0.4878])
tensor([95.3350, 95.6050, 94.2700, 94.5800])
tensor([0.4878])
tensor([97.8000, 97.9400, 95.6500, 97.2000])
tensor([0.4878])
tensor([ 99.2100, 100.6300,  98.1000,  98.1500])
tensor([0.4878])
tensor([ 99.0900, 101.1700,  98.4500, 101.1600])
tensor([0.4878])
tensor([ 98.4100, 100.9207,  97.5200,  99.7000])
tensor([0.4878])
tensor([97.8500, 99.6800, 96.9100, 99.5400])
tensor([0.4878])
tensor([97.5600, 98.8163, 96.2300, 97.6100])
tensor([0.4878])
tensor([101.3200, 101.7800,  97.5700,  98.2400])
tensor([0.4878])
tensor([102.0400, 102.6699,  98.7750, 100.0500])
tensor([0.4878])
tensor([101.1700, 102.4100,  98.0800, 102.1100])
tensor([0.4878])
tensor([102.9250, 103.9484, 100.6500, 102.1800])
tensor([0.4878])
tensor([105.2600, 108.7800, 102.5200, 103.3900])
tensor([0.4878])
tensor([110.2450, 114.0000, 108.8800, 

In [19]:
single_prediction(model, torch.tensor([ 80, 1,  1, 1])) 
#theoretically this should be more different gotta fix some stuff

tensor([0.4835])