In [0]:
import torch
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader 
from torch import nn
from torch.optim import Adam
from torch.utils.data.sampler import SequentialSampler
from torch.utils.data import Sampler
from sklearn.preprocessing import scale

In [0]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device("cuda")
    else:
        return torch.device("cpu")

In [88]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [0]:
device = get_default_device()

In [0]:
def to_device(data,device):
    if isinstance(data,(list,tuple)):
        return [to_device(x,device) for x in data]
    return data.to(device,non_blocking = True)
    

In [0]:
class StockDataset(Dataset):
    def __init__(self,dir_path,start_year = 2002,end_year = 2017,window_size = 12):
        self.dir_path = dir_path
        self.start_year = start_year
        self.end_year = end_year
        self.window_size = window_size
        self.data = self._load_data(self.dir_path,self.start_year,self.end_year)
#         self.preprocessed_data = self._preprocess(self.data)
        self.x,self.y = self._create_features_and_labels(self.data,self.window_size)
        
        
        
        
        
        
    def _load_data(self,dir_path,start_year,end_year):
        col_name = ["date","time","open","high","low","close","volume"]
        df_full = pd.DataFrame(columns = col_name)
        for root, dirs, files in os.walk(dir_path):
            for file in files:
                year = int(file.split(".")[0].split("_")[-1])
                if (file.endswith(".csv") and year >= start_year 
                and year <= end_year):
                    df = pd.read_csv(os.path.join(root,file),names = col_name)
                    df_full = pd.concat([df_full,df])
                    print("Stock data of year {0} loaded".format(str(year)))
        df_full.reset_index(inplace = True)
        return df_full
      
      
      
    
    
    def _create_features_and_labels(self,df,window_size):
       
        n = df.shape[0]
        x_array = np.empty(shape = (n - window_size,window_size))
        y_array = np.empty(shape = (n - window_size,1))
        close_index = df.close
        for i in tqdm(range(n-window_size),desc =  "Creating Features and Labels..."):
            x_array[i] = close_index.iloc[i:i+window_size].values
            y_array[i] = close_index.iloc[i + window_size]
        x_array = scale(x_array)
        y_array = scale(y_array)
        return x_array,y_array
    
    def __getitem__(self,idx):
        return self.x[idx],self.y[idx]
    
    def __len__(self):
        return self.x.shape[0]

In [92]:
PATH = "/content/drive/My Drive/stock_data/EURGBP"
data = StockDataset(dir_path=PATH,start_year=2017)

Creating Features and Labels...:   0%|          | 716/370241 [00:00<00:51, 7147.40it/s]

Stock data of year 2017 loaded


Creating Features and Labels...: 100%|██████████| 370241/370241 [00:34<00:00, 10781.97it/s]


In [0]:
class TestSampler(Sampler):
    r"""Samples elements sequentially, always in the same order.

    Arguments:
        data_source (Dataset): dataset to sample from
    """

    def __init__(self, data_source):
        self.data_source = data_source

    def __iter__(self):
        return iter(range(self.data_source[0],self.data_source[-1] + 1))

    def __len__(self):
        return len(self.data_source)

In [0]:
train_split = .8
dataset_size = len(data)
indices = list(range(dataset_size))
split = int(np.floor(train_split * dataset_size))
train_indices, test_indices = indices[:split], indices[split:]
train_sampler = SequentialSampler(train_indices)
test_sampler = TestSampler(test_indices)

In [0]:
train_dl = DataLoader(data,batch_size=64,sampler=train_sampler,num_workers=8)
test_dl = DataLoader(data,batch_size=64,sampler=test_sampler,num_workers=8)

In [96]:
for xb,yb in test_dl:
  print(xb.device)
  break

cpu


In [0]:
class DeviceDataLoader():
    def __init__(self,dl,device):
        self.dl = dl
        self.device = device
    def __iter__(self):
        for batch in self.dl:
            yield to_device(batch,self.device)
    def __len__(self):
        return len(self.dl)

In [0]:
train_dl = DeviceDataLoader(train_dl,device)
test_dl = DeviceDataLoader(test_dl,device)

In [0]:
for xb,yb in test_dl:
  print(xb.device)
  break

cuda:0


In [0]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        # Hidden dimensions
        self.hidden_dim = hidden_dim

        # Number of hidden layers
        self.layer_dim = layer_dim

        # Building your LSTM
        # batch_first=True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, feature_dim)
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim,batch_first = True)

        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.Sigmoid()
    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()

        # Initialize cell state
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
        x = x[...,None].float()
        # 12 time steps
        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        # Index hidden state of last time step
        # out.size() --> 12,
        # out[:, -1, :] --> 100, 100 --> just want last time step hidden states! 
        out = self.fc(out[:,-1, :]) 
        # out.size() --> 100, 10
        return self.relu(out)

In [0]:
model = LSTMModel(input_dim = 1 , hidden_dim = 8, layer_dim = 1, output_dim = 1)

In [115]:
for t in model.parameters():
  print(t.shape)

torch.Size([32, 1])
torch.Size([32, 8])
torch.Size([32])
torch.Size([32])
torch.Size([1, 8])
torch.Size([1])


In [0]:
def Train(train_dl,model,opt,loss_fn):
  losses =[]
  nums = []
  model.train()
  for xb,yb in train_dl:
    preds = model(xb)
    loss = loss_fn(preds,yb.float())
  
    loss.backward()
    opt.step()
    opt.zero_grad()
    losses.append(loss.item())
    nums.append(len(xb))
      
      
  total = np.sum(nums)
  avg_loss = np.sum(np.multiply(losses,nums)) / total
  
  return avg_loss

In [0]:
def Validation(test_dl,model,loss_fn):
  model.eval()
  with torch.no_grad():
    losses = []
    nums = []
    for xb,yb in test_dl:
      preds = model(xb)
      loss = loss_fn(preds,yb.float())
      losses.append(loss.item())
      nums.append(len(xb))
    total = np.sum(nums)
    avg_loss = np.sum(np.multiply(losses,nums)) / total
    return avg_loss

In [0]:
def fit(epoch,train_dl,test_dl,model,opt,loss_fn):
  
  
  total_loss_train = []
  total_loss_test = []
  opt
  
  
  for i in range(epoch):
    
    #training Phase
    train_loss = Train(train_dl,model,opt,loss_fn)
    total_loss_train.append(train_loss)
    
    #test phase
    val_loss = Validation(test_dl,model,loss_fn)
    total_loss_test.append(val_loss)
    print("Epoch [{}/{}], train_loss: {:.4f}, test_loss: {:.4f}".format(i+1,epoch,
                                                                       train_loss,
                                                                       val_loss))
  return total_loss_train,total_loss_test
    

In [119]:
optimizer = Adam(model.parameters(),lr = 0.001)
loss_fn = nn.MSELoss()
loss_train,loss_test = fit(10,train_dl,test_dl,model,optimizer,loss_fn)

Epoch [1/10], train_loss: 0.7842, test_loss: 0.0090
Epoch [2/10], train_loss: 0.7142, test_loss: 0.0121
Epoch [3/10], train_loss: 0.7149, test_loss: 0.0199
Epoch [4/10], train_loss: 0.7140, test_loss: 0.0047
Epoch [5/10], train_loss: 0.7136, test_loss: 0.0023
Epoch [6/10], train_loss: 0.7134, test_loss: 0.0020
Epoch [7/10], train_loss: 0.7134, test_loss: 0.0020
Epoch [8/10], train_loss: 0.7133, test_loss: 0.0019
Epoch [9/10], train_loss: 0.7133, test_loss: 0.0017
Epoch [10/10], train_loss: 0.7133, test_loss: 0.0016


In [110]:
loss_test

[0.3107427896462443,
 0.3107427896462443,
 0.3107427896462443,
 0.3107427896462443,
 0.3107427896462443,
 0.3107427896462443,
 0.3107427896462443,
 0.3107427896462443,
 0.3107427896462443,
 0.3107427896462443]

In [58]:
(preds.numpy())

array([[0.8411585 ],
       [0.84197557],
       [0.84277093],
       [0.84246993],
       [0.842355  ],
       [0.8423331 ],
       [0.8423337 ],
       [0.8429625 ],
       [0.8420489 ],
       [0.8416215 ],
       [0.8416122 ],
       [0.84166217],
       [0.84106445],
       [0.840611  ],
       [0.83997   ],
       [0.83930755],
       [0.8369018 ],
       [0.83877707],
       [0.8400159 ],
       [0.84026456],
       [0.84069467],
       [0.84056973],
       [0.84011555],
       [0.8407862 ],
       [0.84103096],
       [0.84215057],
       [0.84164643],
       [0.842602  ],
       [0.84285915],
       [0.8408108 ],
       [0.83959174],
       [0.8418094 ],
       [0.8432685 ],
       [0.83897495],
       [0.8390993 ],
       [0.8381301 ],
       [0.83958924],
       [0.8412931 ],
       [0.8421016 ],
       [0.8405628 ],
       [0.8407264 ],
       [0.8406888 ],
       [0.842571  ],
       [0.840335  ],
       [0.83887434],
       [0.8377795 ],
       [0.8363962 ],
       [0.837

In [125]:
i = 0
for xb,yb in test_dl:
  if i > 0:
    with torch.no_grad():
      preds = model(yb)
      print(np.abs(preds - yb))
      break
  i+=1

tensor([[0.0164],
        [0.0167],
        [0.0164],
        [0.0161],
        [0.0171],
        [0.0170],
        [0.0155],
        [0.0155],
        [0.0161],
        [0.0163],
        [0.0163],
        [0.0158],
        [0.0158],
        [0.0146],
        [0.0146],
        [0.0146],
        [0.0149],
        [0.0151],
        [0.0148],
        [0.0152],
        [0.0160],
        [0.0139],
        [0.0141],
        [0.0144],
        [0.0144],
        [0.0136],
        [0.0139],
        [0.0139],
        [0.0158],
        [0.0158],
        [0.0152],
        [0.0154],
        [0.0138],
        [0.0151],
        [0.0146],
        [0.0145],
        [0.0149],
        [0.0145],
        [0.0168],
        [0.0163],
        [0.0160],
        [0.0160],
        [0.0158],
        [0.0160],
        [0.0154],
        [0.0165],
        [0.0160],
        [0.0146],
        [0.0155],
        [0.0155],
        [0.0154],
        [0.0160],
        [0.0164],
        [0.0161],
        [0.0163],
        [0

In [129]:
xb,yb = next(iter(test_dl))
with torch.no_grad():
  print(model(xb))
  print(yb)

tensor([[0.7838],
        [0.7827],
        [0.7817],
        [0.7779],
        [0.7753],
        [0.7758],
        [0.7756],
        [0.7750],
        [0.7743],
        [0.7733],
        [0.7754],
        [0.7770],
        [0.7779],
        [0.7791],
        [0.7782],
        [0.7769],
        [0.7769],
        [0.7771],
        [0.7744],
        [0.7731],
        [0.7722],
        [0.7724],
        [0.7733],
        [0.7750],
        [0.7766],
        [0.7782],
        [0.7781],
        [0.7771],
        [0.7759],
        [0.7741],
        [0.7713],
        [0.7695],
        [0.7702],
        [0.7694],
        [0.7693],
        [0.7690],
        [0.7679],
        [0.7683],
        [0.7686],
        [0.7691],
        [0.7705],
        [0.7700],
        [0.7693],
        [0.7696],
        [0.7709],
        [0.7707],
        [0.7736],
        [0.7749],
        [0.7760],
        [0.7766],
        [0.7767],
        [0.7744],
        [0.7732],
        [0.7711],
        [0.7699],
        [0