In [None]:
import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

plt.style.use('ggplot')
import warnings
warnings.filterwarnings(action='ignore')

from tqdm import tqdm
from torch.utils.data import SubsetRandomSampler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import RobustScaler
from torch.utils.data import Dataset,DataLoader

import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
df = pd.read_csv('../input/5-coin/BTC.csv',infer_datetime_format=True,index_col='Unnamed: 0',usecols=['Unnamed: 0','close'])
df

In [None]:
df.info()

In [None]:
df = df[df.index >= '2019-09-07']
df = df[df.index < '2022-05-01']
df

In [None]:
df.isna().sum()

In [None]:
df = df[['close']]
df

In [None]:
def make_dataset(data, label, window_size=24):
    feature_list = []
    label_list = []
    for i in range(len(data) - window_size):
        feature_list.append(np.array(data.iloc[i:i+window_size]))
        label_list.append(np.array(label.iloc[i+window_size]))
    return np.array(feature_list), np.array(label_list)

In [None]:
scaler = RobustScaler()
df.close = scaler.fit_transform(df.close.to_numpy().reshape(-1,1))

In [None]:
df.close.plot(figsize=(25,10))

In [None]:
train = df[:-48]
test = df[-48:]
print(train.shape,test.shape)

In [None]:
print('Train')
display(train.head())
print('Test')
display(test.head())

In [None]:
x_train, y_train = train.iloc[:-24],train.iloc[24:]

In [None]:
x_train,y_train = make_dataset(train.iloc[:-24],train.iloc[24:],window_size=24)
print(x_train.shape,y_train.shape)

In [None]:
class CustomDataset(Dataset):
    def __init__(self,data,label):
        self.data = data
        self.label = label
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,idx):
        data = torch.tensor(scaler.fit_transform(self.data[idx]),dtype=torch.float64)
        label = torch.tensor(self.label[idx],dtype=torch.int64)
        return data,label

In [None]:
class Encoder(nn.Module):
    def __init__(self,input_size,hidden_size):
        super(Encoder,self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.conv1 = nn.Conv1d(24,128,1,dilation=1)
        self.bn1 = nn.BatchNorm1d(128)
        self.conv2 = nn.Conv1d(128,256,1,dilation=2)
        self.bn2 = nn.BatchNorm1d(256)
        self.conv3 = nn.Conv1d(256,512,1,dilation=1)
        self.bn3 = nn.BatchNorm1d(512)
        self.swish = nn.Hardswish()
    def forward(self,input):
        x = self.swish(self.bn1(self.conv1(input)))
        x = self.swish(self.bn2(self.conv2(x)))
        x = self.swish(self.bn3(self.conv3(x)))
        x = x.permute((2,0,1))
        return x

In [None]:
class Decoder(nn.Module):
    def __init__(self,input_size,hidden_size):
        super(Decoder,self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.gru1 = nn.GRU(1,512,1,batch_first=True)
        self.fc = nn.Linear(512,1)
        
    def forward(self,input,encoder_hidden):
        lstm_output, self.hidden = self.gru(input.unsqueeze(-1),encoder_hidden)
        output = self.fc(lstm_output)
        return output, self.hidden

In [None]:
class AutoEncoder(nn.Module):
    def __init__(self,input_size=1,hidden_size=64):
        super(AutoEncoder,self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.encoder = Encoder(input_size,hidden_size)
        self.decoder = Decoder(input_size,hidden_size)
    def forward(self,input,target,target_len,tf_ratio):
        batch_size = input.shape[0]
        input_size = input.shape[2]
        
        outputs = torch.zeros(batch_size,target_len,input_size)
        hidden = self.encoder(input)
        decoder_input = input[:,-1,:]
        for t in range(target_len):
            output, hidden = self.decoder(decoder_input,hidden)
            output = output.squeeze(1)
            
            if torch.rand(1) < tf_ratio:
                decoder_input = target[:,t,:]
            else:
                decoder_input = output
            outputs[:,t,:] = output
        return outputs
    def predict(self, inputs, target_len):
        self.eval()
        inputs = inputs.unsqueeze(0)
        batch_size = inputs.shape[0]
        input_size = inputs.shape[2]
        outputs = torch.zeros(batch_size, target_len, input_size)
        hidden = self.encoder(inputs)
        decoder_input = inputs[:,-1, :]
        for t in range(target_len): 
            out, hidden = self.decoder(decoder_input, hidden)
            out =  out.squeeze(1)
            decoder_input = out
            outputs[:,t,:] = out
        return outputs.detach().numpy()[0,:,0]

In [None]:
model = AutoEncoder()
model

In [None]:
epochs = 1000
lr = 3e-4
verbose = 10
optimizer = torch.optim.AdamW(model.parameters(),lr=lr,weight_decay=0.1)
device = ('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer=optimizer,
    factor=0.5,
    patience=20,
    cooldown=1,
    min_lr=1e-4,
    verbose=1,
    threshold=0.6
)

In [None]:
# from tqdm import tqdm
# model.to(device)
# model.train()
# with tqdm(range(epochs)) as tr:
#     for i in tr:
#         total_loss = 0.0
#         for x,y in train_loader:
#             x = x.to(device).float()
#             y = y.to(device).float()
#             optimizer.zero_grad()
#             logits = model(x,y,ow,0.6).to(device)
#             loss = F.smooth_l1_loss(logits,y)
#             scheduler.step(loss)
#             loss.backward()
#             optimizer.step()
#             total_loss += loss.cpu().item()
            
#         tr.set_postfix('Loss:{:.3f}'.format(total_loss/len(train_loader)))
        

In [None]:
from tqdm import tqdm
model = AutoEncoder()
model.to(device)
total_loss = []
min_loss = np.Inf
for e in range(epochs):
    tqdm_train = tqdm(train_loader)
    for x,y in tqdm_train:
        x,y = x.to(device).float(),y.to(device).float()
        
        optimizer.zero_grad()
        logits = model(x,y,ow,0.6).to(device)
        loss = F.smooth_l1_loss(logits,y)
        loss.backward()
        optimizer.step()
        
        loss = loss.detach().cpu().numpy()
    scheduler.step(loss)
    total_loss.append(np.mean(loss))
    verbose += 1
    torch.save(model.state_dict(),'cnn2rnn.pt')
    if verbose % 10 == 0:
        print('Epoch:{}\tLoss:{:.5f}'.format(e+1,total_loss[e]))
        verbose = 1
    if total_loss[e] < min_loss:
        print('Loss decreased {:.5f} ---> {:.5f} so saving the model'.format(min_loss,total_loss[e]))
        torch.save(model.state_dict(),'seq2seq.pt')
        min_loss = total_loss[e]
        patience = 0
    else:
        patience += 1
        print(f'patience:{patience}')
        if patience == 10:
            print(f'Best loss:{min_loss}')
            break
    

In [None]:
model.load_state_dict(torch.load('./CNN2RNN.pt'))

In [None]:
pred = model.predict(torch.tensor(np.array(train[-24:])).reshape(-1,1).to(device).float(), target_len=ow)