In [59]:
import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn

In [60]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [61]:
data_path = "/content/gdrive/My Drive/Project/Stocks/"

In [62]:
def get_all_files(data_path=data_path):
    all_files = []

    for file in os.listdir(data_path)[:10]:
        f = open(data_path+file)
        if len(f.readlines()) > 100: 
            all_files.append(data_path+file)
        f.close()
    
    return all_files

all_files = get_all_files()

In [63]:
n = len(all_files)

all_files = np.array(all_files, dtype=str)
np.random.shuffle(all_files)

training_data = all_files[:int(n*0.6)] 
validation_data = all_files[int(n*0.6):int(n*0.8)]
test_data = all_files[int(n*0.8):]

In [71]:
def transform_data(file, K):
    X, T = [], []

    f = open(file, "r")

    data = []
    for line in f.readlines()[-100:]:
        line = line.split(",")[1:-2]

        data.append(line)
        if len(data) == K+1:
            x = np.array(data[:-1], dtype=float)
            X.append(x)
            T.append(float(data[-1][-1]))

            data = data[1:]

    f.close()
    
    return torch.tensor(X, dtype=torch.float), torch.tensor(T, dtype=torch.float)

In [83]:
class StocksRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(StocksRNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])
        return out

class StocksLSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(StocksLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

In [88]:
def get_mean_squared_error(model, files, K):
    error, total = 0, 0
    for f in files:
        X, T = transform_data(f, K)
        output = model(X)
        error += torch.sum((T - output)**2)

        total += T.shape[0]
    return error / total