In [1]:
import torch,torchvision
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torch.nn import *
from torch.optim import *
from tqdm import tqdm
import wandb
import cv2
PROJECT_NAME = 'Ethereum-Price-Prediction'
device = 'cuda:0'

In [2]:
data = pd.read_csv('./data.csv')

In [3]:
data.iloc[100:]

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
100,"May 01, 2021",2944.66,2772.27,2951.74,2753.60,1.51M,6.20%
101,"Apr 30, 2021",2772.78,2757.02,2797.61,2726.03,1.52M,0.56%
102,"Apr 29, 2021",2757.36,2746.51,2797.52,2670.35,1.65M,0.39%
103,"Apr 28, 2021",2746.54,2665.83,2759.03,2562.74,1.74M,3.05%
104,"Apr 27, 2021",2665.24,2531.07,2677.95,2484.44,1.45M,5.22%
...,...,...,...,...,...,...,...
1974,"Mar 14, 2016",12.5,15.07,15.07,11.4,92.18K,-17.05%
1975,"Mar 13, 2016",15.07,12.92,15.07,12.92,1.30K,16.64%
1976,"Mar 12, 2016",12.92,11.95,13.45,11.95,0.83K,8.12%
1977,"Mar 11, 2016",11.95,11.75,11.95,11.75,0.18K,1.70%


In [4]:
X = data['Date'][100:]

In [5]:
y = data['Price'][100:]

In [6]:
month_labels = {}

In [7]:
idx = -1

In [8]:
for date in X:
    date = date.split(' ')
    if date[0] not in list(month_labels.keys()):
        idx += 1
        month_labels[date[0]] = idx

In [9]:
new_X = []

In [10]:
for date in X:
    date = date.split(' ')
    date[0] = month_labels[date[0]]
    date[1] = int(date[1].replace(',',''))
    date[2] = int(date[2])
    date = int(f'{date[0]}{date[1]}{date[2]}')
    new_X.append(date)

In [11]:
X = torch.from_numpy(np.array(new_X))

In [12]:
new_y = []

In [13]:
for price in y:
    price = float(price.replace(',',''))
    new_y.append(price)

In [14]:
y = torch.from_numpy(np.array(new_y))

In [15]:
class LSTMPredictor(Module):
    def __init__(self,n_hidden=512):
        super().__init__()
        self.n_hidden = n_hidden
        # lstm1, lstm2, linear
        self.lstm1 = LSTMCell(1,self.n_hidden).to(device)
        self.lstm2 = LSTMCell(self.n_hidden,self.n_hidden).to(device)
        self.linear = Linear(self.n_hidden,1).to(device)
    
    def forward(self,X,future=0):
        outputs = []
        n_samples = X.size(0)
        h_t = torch.zeros(n_samples,self.n_hidden,dtype=torch.float32)
        c_t = torch.zeros(n_samples,self.n_hidden,dtype=torch.float32)
        h_t2 = torch.zeros(n_samples,self.n_hidden,dtype=torch.float32)
        c_t2 = torch.zeros(n_samples,self.n_hidden,dtype=torch.float32)
        for input_t in X.split(1,dim=1):
            input_t = input_t.to(device)
            h_t = h_t.to(device)
            c_t = c_t.to(device)
            h_t,c_t = self.lstm1(input_t,(h_t,c_t))
            h_t = h_t.to(device)
            c_t = c_t.to(device)
            h_t2 = h_t2.to(device)
            c_t2 = c_t2.to(device)
            h_t2,c_t2 = self.lstm2(h_t,(h_t2,c_t2))
            h_t2 = h_t2.to(device)
            c_t2 = c_t2.to(device)
            output = self.linear(h_t2)
            output = output.to(device)
            outputs.append(output)
        for i in range(future):
            h_t,c_t = self.lstm1(output,(h_t,c_t))
            h_t2,c_t2 = self.lstm2(h_t,(h_t2,c_t2))
            output = self.linear(h_t2)
            outputs.append(output)
        outputs = torch.cat(outputs,dim=1)
        return outputs

In [16]:
epochs = 18

In [17]:
batch_size = len(y)

In [18]:
data_input = torch.from_numpy(np.array(y)).view(1,-1)[:3,:-1].float().to(device)
data_target = torch.from_numpy(np.array(y)).view(1,-1)[:3,1:].float().to(device)

In [19]:
model = LSTMPredictor().to(device)
model.to(device)

LSTMPredictor(
  (lstm1): LSTMCell(1, 512)
  (lstm2): LSTMCell(512, 512)
  (linear): Linear(in_features=512, out_features=1, bias=True)
)

In [20]:
criterion = MSELoss()

In [21]:
optimizer = LBFGS(model.parameters(),lr=0.8)

In [None]:
wandb.init(project=PROJECT_NAME,name='baseline')
for _ in tqdm(range(epochs)):
    for idx in range(0,len(y),batch_size):
        y_batch_input = data_input[idx:idx+batch_size].float().to(device)
        y_batch_target = data_target[idx:idx+batch_size].float().to(device)
        torch.cuda.empty_cache()
        def closure():
            optimizer.zero_grad()
            data_input.cuda()
            data_target.cuda()
            model.cuda()
            out = model(data_input.cuda()).cuda()
            out.cuda()
            loss = criterion(out,data_target)
            wandb.log({'loss':loss.item()})
            loss.backward()
            return loss
        optimizer.step(closure)
        with torch.no_grad():
            future = 125
            pred = model(data_input,future=future)
            loss = criterion(pred[:,:-future],data_target)
            y = pred.to('cpu').detach().numpy()
            wandb.log({'Val Loss':loss.item()})
    plt.figure(figsize=(12,6))
    n = data_input.shape[1]
    def draw(y_i,color):
        plt.plot(np.arange(n),data_target.cpu().view(-1),color)
        plt.plot(np.arange(n,n+future),y_i[n:],color + ':')
    draw(y[0],'r')
    plt.savefig('./preds/img.png')
    plt.close()
    wandb.log({'Img':wandb.Image(cv2.imread('./preds/img.png'))})
wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33mranuga-d[0m (use `wandb login --relogin` to force relogin)


 56%|█████▌    | 10/18 [05:18<04:21, 32.75s/it]

In [None]:
pres = [StandardScaler(),RobustScaler(),MinMaxScaler(),MaxAbsScaler(),Normalizer()]

In [None]:
y_old = y

In [None]:
for pre in pres:
    y = y_old
    pre.fit(y.view(-1,1))
    y = pre.transform(y.view(-1,1))
    model = LSTMPredictor().to(device)
    model.to(device)
    criterion = MSELoss()
    optimizer = LBFGS(model.parameters(),lr=0.8)
    data_input = torch.from_numpy(np.array(y)).view(1,-1)[:3,:-1].float().to(device)
    data_target = torch.from_numpy(np.array(y)).view(1,-1)[:3,1:].float().to(device)
    for _ in tqdm(range(epochs)):
        for idx in range(0,len(y),batch_size):
            y_input_batch = data_input[idx:idx+batch_size]
            y_target_batch = data_target[idx:idx+batch_size]
            def closure():
                optimizer.zero_grad()
                preds = model(y_input_batch)
                loss = criterion(preds,y_target_batch)
                wandb.log({'Loss':loss.item()})
                loss.backward()
                return loss
            optimizer.step(closure)
            with torch.no_grad():
                future = 100
                preds = model(data_input,future)
                loss = criterion(preds,data_target)
                wandb.log({'Val Loss':loss.item()})
                test_preds = preds.cpu().detach().numpy()
        plt.figure(figsize=(12,6))
        n = data_input_batch.shape[1]
        def draw(y_i,color):
            plt.plot(np.arange(n),data_target.cpu().view(-1),color)
            plt.plot(np.arange(n,n+future),y_i[n:],color + ':')
        draw(y[0],'r')
        plt.savefig('./preds/img.png')
        plt.close()
        wandb.log({'Img':wandb.Image(cv2.imread('./preds/img.png'))})