In [1]:
import numpy as np
import torch.optim as optim
import os,sys, re, time
import PIL
from PIL import Image as PILImage
import skimage
import torch
import torch.nn as nn
import torch.utils.data as data
import pandas as pd
import skimage
import torchvision
import sklearn
import matplotlib
import matplotlib.pyplot as plt
#matplotlib inline
from IPython.display import display, HTML
start = time.time()

In [2]:
df = pd.read_csv('time_series_data/train_data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Date,Open,High,Low,Close,Volume,Name
0,0,2006-01-03,10.34,10.68,10.32,10.68,201853036,AAPL
1,1,2006-01-04,10.73,10.85,10.64,10.71,155225609,AAPL
2,2,2006-01-05,10.69,10.7,10.54,10.63,112396081,AAPL
3,3,2006-01-06,10.75,10.96,10.65,10.9,176139334,AAPL
4,4,2006-01-09,10.96,11.03,10.82,10.86,168861224,AAPL


In [3]:
df['Name'].unique()

array(['AAPL'], dtype=object)

In [13]:
class StocksDataLoader(data.Dataset):
    def __init__(self,stocks = False,data_dir=None, stage=None, length=None):
        self.data_dir = data_dir
        self.stage = stage
        self.length = length
        self.stocks = stocks
        if not self.stocks:
            if self.stage == 'train': 
                self.data = self.load_data(os.path.join(self.data_dir,'train_data.csv'))
            elif self.stage == 'test':
                self.data = self.load_data(os.path.join(self.data_dir, 'test_data.csv'))
            elif self.stage == 'val':
                self.data = self.load_data(os.path.join(self.data_dir, 'val_data.csv'))
        else:
            tr_ratio, val_ratio, test_ratio = 0.7,0.15,0.15
            self.root = os.path.join(self.data_dir,self.stocks)
            self.data = self.split_data(self.root,tr_ratio, val_ratio, test_ratio)
        
            
            
    def load_data(self,path):
        data = pd.read_csv(path)
        data = torch.tensor(data['Close'])
        return data
    
    def split_data(self,path,tr_ratio,val_ratio,test_ratio):
        
        data = pd.read_csv(path)
        total_rows = len(data)
        train_end_idx = int(tr_ratio*total_rows)
        val_end_idx = int(train_end_idx + val_ratio*total_rows)
        test_end_idx = int(val_end_idx + test_ratio*total_rows)
        
        train_data = data.iloc[:train_end_idx]
        val_data = data.iloc[train_end_idx:val_end_idx]
        test_data = data.iloc[val_end_idx:]
        if self.stage == 'train':
            data = torch.tensor(train_data['Close'].values)
        elif self.stage == 'val':
            data = torch.tensor(val_data['Close'].values)
        elif self.stage == 'test':
            data = torch.tensor(test_data['Close'].values)
        return data
    
    def __getitem__(self,idx):
        input_data = self.data[idx:idx+self.length].float()
        output_data = self.data[idx+self.length].float()
        return idx, input_data, output_data
    
    def __len__(self,):
        return len(self.data) - self.length
 

In [14]:
class Model(nn.Module):
    def __init__(self,num_layers=None, num_target = 1):
        super(Model, self).__init__()
        self.num_layers = num_layers 
        self.num_target = num_target
        self.linear1 = nn.Linear(num_layers,512)
        self.sigmoid = nn.Sigmoid()
        self.batchNorm = nn.BatchNorm1d(512)
        self.drop = nn.Dropout()
        self.linear2 = nn.Linear(512,1)
        self.init_weights()
        
    def forward(self,x):
        x = self.linear1(x)
        x = self.sigmoid(x)
        x = self.batchNorm(x)
        x = self.drop(x)
        x = self.linear2(x)
        return x
    
    def init_weights(self,):
        for layer in self.modules():
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight.data)
                layer.bias.data.fill_(0.1)
                
                
            elif isinstance(layer, nn.BatchNorm1d):
                layer.weight.data.fill_(1)
                layer.bias.data.fill_(0.01)
                
            
        
        
        

In [15]:
seq_length = 20
train_batch_size = 512
val_batch_size = 512
test_batch_size = 512

train_data = StocksDataLoader('MSFT_2006-01-01_to_2018-01-01.csv','time_series_data','train',seq_length)
train_dataloader = data.DataLoader(train_data, batch_size = train_batch_size, shuffle=True)

val_data = StocksDataLoader('MSFT_2006-01-01_to_2018-01-01.csv','time_series_data','val',seq_length)
val_dataloader = data.DataLoader(val_data, batch_size = val_batch_size, shuffle=False)

test_data = StocksDataLoader('MSFT_2006-01-01_to_2018-01-01.csv','time_series_data','test',seq_length)
test_dataloader = data.DataLoader(test_data, batch_size = test_batch_size, shuffle=False)



In [16]:
device = torch.device('cpu')
if torch.cuda.is_available():
    device = torch.device('cuda')
print('Using {}'.format(device))

Using cuda


In [17]:
model = Model(seq_length)
model.to(device)
model.train()
total_params = 0
print(model)
for n,params in model.state_dict().items():
    total_params += params.numel()
    
print('Total number of trainable parameters : {}'.format(total_params))


Model(
  (linear1): Linear(in_features=20, out_features=512, bias=True)
  (sigmoid): Sigmoid()
  (batchNorm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (drop): Dropout(p=0.5, inplace=False)
  (linear2): Linear(in_features=512, out_features=1, bias=True)
)
Total number of trainable parameters : 13314


In [18]:
optimizer = optim.SGD(model.parameters(),lr = 1e-4, weight_decay=1e-3)
loss = nn.MSELoss()
epochs =1000
curr_loss = 0


trai_loss = []
val_loss = []

for e in range(epochs):
    curr_val_loss = 0
    curr_train_loss = 0
    
    for batch in train_dataloader:
        optimizer.zero_grad()
        idx, X,y = batch
        X = X.to(device)
        y = y.to(device)
        output = model(X)
        
        loss_tr = loss(output.view(-1),y)
        loss_tr.backward()
        optimizer.step()
        curr_train_loss += loss_tr.div(train_batch_size)
    trai_loss.append(curr_train_loss.div(len(train_dataloader)).item())
    optimizer.zero_grad()
                       
    
    
    for batch in val_dataloader:
        idx,X,y=batch
        X = X.to(device)
        y = y.to(device)
        output = model(X)
        loss_validation = loss(output.view(-1),y)
        curr_val_loss += loss_validation.div(val_batch_size)
    val_loss.append(curr_val_loss.div(len(val_dataloader)).item())

    print("Epoch:{0:d}, Train loss: {1:0.3f}, Validation loss: {2:0.3f}".format(e, curr_train_loss.div(len(train_dataloader)).item(), curr_val_loss.div(len(val_dataloader)).item()))

Epoch:0, Train loss: 1.570, Validation loss: 4.288
Epoch:1, Train loss: 1.544, Validation loss: 4.289
Epoch:2, Train loss: 1.525, Validation loss: 4.267
Epoch:3, Train loss: 1.528, Validation loss: 4.245
Epoch:4, Train loss: 1.478, Validation loss: 4.217
Epoch:5, Train loss: 1.500, Validation loss: 4.201
Epoch:6, Train loss: 1.484, Validation loss: 4.187
Epoch:7, Train loss: 1.487, Validation loss: 4.187
Epoch:8, Train loss: 1.460, Validation loss: 4.158
Epoch:9, Train loss: 1.472, Validation loss: 4.133
Epoch:10, Train loss: 1.467, Validation loss: 4.140
Epoch:11, Train loss: 1.441, Validation loss: 4.113
Epoch:12, Train loss: 1.459, Validation loss: 4.090
Epoch:13, Train loss: 1.442, Validation loss: 4.077
Epoch:14, Train loss: 1.433, Validation loss: 4.056
Epoch:15, Train loss: 1.415, Validation loss: 4.045
Epoch:16, Train loss: 1.384, Validation loss: 4.015
Epoch:17, Train loss: 1.384, Validation loss: 4.004
Epoch:18, Train loss: 1.369, Validation loss: 3.976
Epoch:19, Train loss: 

Epoch:160, Train loss: 0.013, Validation loss: 0.731
Epoch:161, Train loss: 0.013, Validation loss: 0.737
Epoch:162, Train loss: 0.012, Validation loss: 0.721
Epoch:163, Train loss: 0.014, Validation loss: 0.709
Epoch:164, Train loss: 0.012, Validation loss: 0.725
Epoch:165, Train loss: 0.015, Validation loss: 0.712
Epoch:166, Train loss: 0.014, Validation loss: 0.717
Epoch:167, Train loss: 0.013, Validation loss: 0.711
Epoch:168, Train loss: 0.013, Validation loss: 0.721
Epoch:169, Train loss: 0.014, Validation loss: 0.724
Epoch:170, Train loss: 0.014, Validation loss: 0.743
Epoch:171, Train loss: 0.013, Validation loss: 0.716
Epoch:172, Train loss: 0.011, Validation loss: 0.721
Epoch:173, Train loss: 0.013, Validation loss: 0.724
Epoch:174, Train loss: 0.013, Validation loss: 0.733
Epoch:175, Train loss: 0.013, Validation loss: 0.729
Epoch:176, Train loss: 0.011, Validation loss: 0.715
Epoch:177, Train loss: 0.013, Validation loss: 0.717
Epoch:178, Train loss: 0.013, Validation loss:

Epoch:317, Train loss: 0.010, Validation loss: 0.719
Epoch:318, Train loss: 0.009, Validation loss: 0.711
Epoch:319, Train loss: 0.009, Validation loss: 0.718
Epoch:320, Train loss: 0.011, Validation loss: 0.723
Epoch:321, Train loss: 0.009, Validation loss: 0.715
Epoch:322, Train loss: 0.009, Validation loss: 0.708
Epoch:323, Train loss: 0.010, Validation loss: 0.698
Epoch:324, Train loss: 0.010, Validation loss: 0.713
Epoch:325, Train loss: 0.009, Validation loss: 0.710
Epoch:326, Train loss: 0.009, Validation loss: 0.713
Epoch:327, Train loss: 0.010, Validation loss: 0.717
Epoch:328, Train loss: 0.009, Validation loss: 0.710
Epoch:329, Train loss: 0.009, Validation loss: 0.706
Epoch:330, Train loss: 0.009, Validation loss: 0.710
Epoch:331, Train loss: 0.009, Validation loss: 0.713
Epoch:332, Train loss: 0.009, Validation loss: 0.708
Epoch:333, Train loss: 0.009, Validation loss: 0.701
Epoch:334, Train loss: 0.009, Validation loss: 0.712
Epoch:335, Train loss: 0.009, Validation loss:

Epoch:473, Train loss: 0.008, Validation loss: 0.723
Epoch:474, Train loss: 0.008, Validation loss: 0.693
Epoch:475, Train loss: 0.008, Validation loss: 0.704
Epoch:476, Train loss: 0.008, Validation loss: 0.695
Epoch:477, Train loss: 0.008, Validation loss: 0.706
Epoch:478, Train loss: 0.009, Validation loss: 0.703
Epoch:479, Train loss: 0.008, Validation loss: 0.708
Epoch:480, Train loss: 0.008, Validation loss: 0.700
Epoch:481, Train loss: 0.009, Validation loss: 0.704
Epoch:482, Train loss: 0.008, Validation loss: 0.698
Epoch:483, Train loss: 0.008, Validation loss: 0.713
Epoch:484, Train loss: 0.008, Validation loss: 0.700
Epoch:485, Train loss: 0.008, Validation loss: 0.684
Epoch:486, Train loss: 0.009, Validation loss: 0.704
Epoch:487, Train loss: 0.007, Validation loss: 0.704
Epoch:488, Train loss: 0.008, Validation loss: 0.705
Epoch:489, Train loss: 0.008, Validation loss: 0.711
Epoch:490, Train loss: 0.008, Validation loss: 0.704
Epoch:491, Train loss: 0.008, Validation loss:

Epoch:631, Train loss: 0.007, Validation loss: 0.702
Epoch:632, Train loss: 0.007, Validation loss: 0.696
Epoch:633, Train loss: 0.007, Validation loss: 0.711
Epoch:634, Train loss: 0.008, Validation loss: 0.697
Epoch:635, Train loss: 0.007, Validation loss: 0.711
Epoch:636, Train loss: 0.007, Validation loss: 0.700
Epoch:637, Train loss: 0.008, Validation loss: 0.692
Epoch:638, Train loss: 0.008, Validation loss: 0.704
Epoch:639, Train loss: 0.007, Validation loss: 0.704
Epoch:640, Train loss: 0.008, Validation loss: 0.719
Epoch:641, Train loss: 0.007, Validation loss: 0.709
Epoch:642, Train loss: 0.007, Validation loss: 0.705
Epoch:643, Train loss: 0.007, Validation loss: 0.695
Epoch:644, Train loss: 0.008, Validation loss: 0.703
Epoch:645, Train loss: 0.008, Validation loss: 0.703
Epoch:646, Train loss: 0.007, Validation loss: 0.708
Epoch:647, Train loss: 0.008, Validation loss: 0.705
Epoch:648, Train loss: 0.007, Validation loss: 0.710
Epoch:649, Train loss: 0.008, Validation loss:

Epoch:790, Train loss: 0.008, Validation loss: 0.715
Epoch:791, Train loss: 0.007, Validation loss: 0.707
Epoch:792, Train loss: 0.007, Validation loss: 0.711
Epoch:793, Train loss: 0.007, Validation loss: 0.700
Epoch:794, Train loss: 0.008, Validation loss: 0.712
Epoch:795, Train loss: 0.007, Validation loss: 0.705
Epoch:796, Train loss: 0.007, Validation loss: 0.705
Epoch:797, Train loss: 0.007, Validation loss: 0.699
Epoch:798, Train loss: 0.007, Validation loss: 0.705
Epoch:799, Train loss: 0.007, Validation loss: 0.708
Epoch:800, Train loss: 0.007, Validation loss: 0.702
Epoch:801, Train loss: 0.007, Validation loss: 0.697
Epoch:802, Train loss: 0.007, Validation loss: 0.709
Epoch:803, Train loss: 0.007, Validation loss: 0.703
Epoch:804, Train loss: 0.007, Validation loss: 0.704
Epoch:805, Train loss: 0.007, Validation loss: 0.708
Epoch:806, Train loss: 0.007, Validation loss: 0.702
Epoch:807, Train loss: 0.007, Validation loss: 0.708
Epoch:808, Train loss: 0.008, Validation loss:

Epoch:947, Train loss: 0.007, Validation loss: 0.710
Epoch:948, Train loss: 0.007, Validation loss: 0.699
Epoch:949, Train loss: 0.007, Validation loss: 0.709
Epoch:950, Train loss: 0.007, Validation loss: 0.709
Epoch:951, Train loss: 0.007, Validation loss: 0.699
Epoch:952, Train loss: 0.007, Validation loss: 0.698
Epoch:953, Train loss: 0.007, Validation loss: 0.709
Epoch:954, Train loss: 0.008, Validation loss: 0.703
Epoch:955, Train loss: 0.007, Validation loss: 0.711
Epoch:956, Train loss: 0.007, Validation loss: 0.705
Epoch:957, Train loss: 0.007, Validation loss: 0.712
Epoch:958, Train loss: 0.007, Validation loss: 0.697
Epoch:959, Train loss: 0.007, Validation loss: 0.704
Epoch:960, Train loss: 0.007, Validation loss: 0.712
Epoch:961, Train loss: 0.006, Validation loss: 0.697
Epoch:962, Train loss: 0.008, Validation loss: 0.697
Epoch:963, Train loss: 0.007, Validation loss: 0.704
Epoch:964, Train loss: 0.007, Validation loss: 0.707
Epoch:965, Train loss: 0.008, Validation loss:

In [23]:
test_loss = 0
total = 0
correct = 0
for batch in test_dataloader:
    idx, X,y = batch
    X,y = X.to(device), y.to(device)
    
    output = model(X)

    curr_loss_test = loss(output.view(-1),y)
    test_loss += curr_loss_test.div(test_batch_size)
    
    total += test_batch_size
print('Test Loss : {}'.format(test_loss.div(len(test_dataloader))))

Test Loss : 2.7382290363311768
