In [1]:
# Data related variables, treatment, loading and visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

# Just to time the training of the model
import time

# Pytorch libraries to create and train the model
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchmetrics import MeanAbsoluteError, MeanAbsolutePercentageError

# Remove Warnings 
import warnings
warnings.filterwarnings('ignore')

""" \
Our transformer model will have an input size of (32, 128, 5) \
32 -> batch size \
128 -> sequence length \
5 -> number of features (Open, Low, High, Close) \
"""

In [2]:
# the features are separated into sequences of 128 days, 4 price features and 1 volume feature
seq_len = 128
# target_len = 3

# during a single step the model receives 32 sequences
batch_size = 32

In [3]:
# Stock for the deep learning
ticker = "GOOGL"

In [4]:
# Importing dataset (use the function in data/data.py)
df = pd.read_csv(
    "data/data.csv", usecols=["Date", "High", "Low", "Open", "Close", "Volume"]
)

In [5]:
# Avoid dividing by 0
df["Volume"].replace(to_replace=0, method="ffill", inplace=True)

# Sort the values based on date
df.sort_values("Date", inplace=True)

# Apply moving average with a window of 10 days to all columns
df[['Open', 'High', 'Low', 'Close', 'Volume']] = df[['Open', 'High', 'Low', 'Close', 'Volume']].rolling(10).mean() 

df.tail()

Unnamed: 0,Date,High,Low,Open,Close,Volume
6661,2022-11-14,92.391,89.701999,90.684,91.674001,37950530.0
6662,2022-11-15,93.741,91.001999,91.97,92.860001,37004050.0
6663,2022-11-16,95.041,92.394999,93.22,94.087001,34804010.0
6664,2022-11-17,96.079,93.388999,94.189001,95.074001,34001380.0
6665,2022-11-18,96.937,94.267,95.176,95.927001,33791360.0


In [6]:
df["Open"] = df["Open"].pct_change()  # Create arithmetic returns column
df["High"] = df["High"].pct_change()  # Create arithmetic returns column
df["Low"] = df["Low"].pct_change()  # Create arithmetic returns column
df["Close"] = df["Close"].pct_change()  # Create arithmetic returns column
df["Volume"] = df["Volume"].pct_change()

# Drop the rows with the NaN created by the percentage change
df.dropna(how="any", axis=0, inplace=True)

In [7]:
# Get the values to create the separation of the dataset
times = sorted(df.index.values)
last_10pct = sorted(df.index.values)[-int(0.1 * len(times))]
last_20pct = sorted(df.index.values)[-int(0.2 * len(times))]

In [8]:
# min-max price columns
min_return = min(
    df[(df.index < last_20pct)][["Open", "High", "Low", "Close"]].min(axis=0)
)
max_return = max(
    df[(df.index < last_20pct)][["Open", "High", "Low", "Close"]].max(axis=0)
)

# Min-max normalize price columns (0-1 range)
df["Open"] = (df["Open"] - min_return) / (max_return - min_return)
df["High"] = (df["High"] - min_return) / (max_return - min_return)
df["Low"] = (df["Low"] - min_return) / (max_return - min_return)
df["Close"] = (df["Close"] - min_return) / (max_return - min_return)

In [9]:
# min-max volume column
min_volume = df[(df.index < last_20pct)]["Volume"].min(axis=0)
max_volume = df[(df.index < last_20pct)]["Volume"].max(axis=0)

# Min-max normalize volume columns (0-1 range)
df["Volume"] = (df["Volume"] - min_volume) / (max_volume - min_volume)

In [10]:
df_train = df[(df.index < last_20pct)]  # Training data are 80% of total data
df_val = df[(df.index >= last_20pct) & (df.index < last_10pct)]
df_test = df[(df.index >= last_10pct)]

# Drop the date column from the splitted datasets
df_train.drop(columns=["Date"], inplace=True)
df_val.drop(columns=["Date"], inplace=True)
df_test.drop(columns=["Date"], inplace=True)

# Train data into arrays np.ndarray
train_data = df_train.values
val_data = df_val.values
test_data = df_test.values
print(f"Training data shape: {train_data.shape}")
print(f"Validation data shape: {val_data.shape}")
print(f"Test data shape: {test_data.shape}")

df_train.head()

Training data shape: (5325, 5)
Validation data shape: (666, 5)
Test data shape: (665, 5)


Unnamed: 0,High,Low,Open,Close,Volume
10,0.533315,0.624841,0.594562,0.555345,0.137539
11,0.403434,0.485776,0.530637,0.358239,0.249561
12,0.374853,0.488245,0.481612,0.363512,0.223069
13,0.362567,0.446667,0.488121,0.328155,0.24176
14,0.281407,0.291742,0.271953,0.329953,0.364811


In [11]:
# df_train.T#[:,0:128]

In [12]:
# encoder_layer = nn.TransformerEncoderLayer(d_model=128, nhead=8)
# src = torch.rand(10, 5, 128)
# out = encoder_layer(src)

In [13]:
# src.size()

# Dataset

In [14]:
class TickerData(Dataset):
    def __init__(self, data: np.ndarray, seq_len: int) -> None:
        """Init function of dataset class

        Args:
            data (np.ndarray): data from the dataframe to numpy
            seq_len (int): len of values to base for the prediction
        """
        self.inputs, self.targets = [], []

        for i in range(seq_len, len(data)):
            # Chunks of  data with a length of 128 df-rows
            self.inputs.append(data[i-seq_len:i])
            
            # Value of 4th column (Close Price) of df-row 128+1
            self.targets.append(data[:, 3][i])
        
        self.inputs, self.targets = torch.FloatTensor(np.array(self.inputs)), torch.FloatTensor(np.array(self.targets))
        # print(self.inputs)
        # print(self.inputs.shape)
        
    def __getitem__(self, idx: int) -> dict:
        """Get item at a certain index

        Args:
            idx (int): index to get the value

        Returns:
            dict: returns the input and the target
        """
        return {'inputs': self.inputs[idx],
                'targets': self.targets[idx]}
    
    def __len__(self) -> int:
        """Length function 

        Returns:
            int: length of the Dataset
        """
        return min(len(self.inputs), len(self.targets))

In [15]:
train_data = TickerData(data=train_data, seq_len=seq_len)
val_data = TickerData(data=val_data, seq_len=seq_len)
test_data = TickerData(data=test_data, seq_len=seq_len)

In [16]:
len(train_data)

5197

# DataLoader

In [17]:
train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=False, drop_last=True)
val_loader = DataLoader(dataset=val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False, drop_last=True)

In [18]:
# for _,batch in enumerate(train_loader):
#     x = batch["inputs"]
#     break

In [19]:
# x

In [20]:
# x.swapaxes(1,2)

# Device

In [21]:
# This concept is also called teacher forceing. 
# The flag decides if the loss will be calculted over all 
# or just the predicted values.
calculate_loss_over_all_values = False

# S is the source sequence length
# T is the target sequence length
# N is the batch size
# E is the feature number

#src = torch.rand((10, 32, 512)) # (S,N,E) 
#tgt = torch.rand((20, 32, 512)) # (T,N,E)
#out = transformer_model(src, tgt)
#
#print(out)

# input_window = 100
# output_window = 5
# batch_size = 10 # batch size
device = torch.device("cpu" if torch.backends.mps.is_available() else "cpu")

In [22]:
device

device(type='cpu')

# Model

In [23]:
# class PositionalEncoding(nn.Module):

#     def __init__(self, d_model, max_len=5000):
#         super(PositionalEncoding, self).__init__()       
#         pe = torch.zeros(max_len, d_model)
#         position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
#         div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
#         pe[:, 0::2] = torch.sin(position * div_term)
#         pe[:, 1::2] = torch.cos(position * div_term)
#         pe = pe.unsqueeze(0).transpose(0, 1)
#         #pe.requires_grad = False
#         self.register_buffer('pe', pe)

#     def call(self, x):
#         # Add extra dimension to self.pe
#         self.pe = self.pe.unsqueeze(0)
#         return x + self.pe[:x.size(0), :]


#     def forward(self, x):
#         print(x)
#         # print(x.size())
#         print(self.pe)
#         # print(self.pe.size())
#         # self.pe = self.pe.unsqueeze(0)
#         return x + self.pe[:x.size(0), :]


# class T2V(nn.Module):
    
#     def __init__(self, output_dim=None):
#         super(T2V, self).__init__()
#         self.output_dim = output_dim
        
#         self.W = nn.Parameter(torch.Tensor(input_shape[-1], self.output_dim))
#         nn.init.uniform_(self.W)
        
#         self.P = nn.Parameter(torch.Tensor(input_shape[1], self.output_dim))
#         nn.init.uniform_(self.P)
        
#         self.w = nn.Parameter(torch.Tensor(input_shape[1], 1))
#         nn.init.uniform_(self.w)
        
#         self.p = nn.Parameter(torch.Tensor(input_shape[1], 1))
#         nn.init.uniform_(self.p)

#     def forward(self, x):
#         original = self.w * x + self.p
#         sin_trans = torch.sin(torch.matmul(x, self.W) + self.P)
        
#         return torch.cat([sin_trans, original], dim=-1)

# class Time2Vector(nn.Module):
#   def __init__(self, seq_len):
#     super(Time2Vector, self).__init__()
#     self.seq_len = seq_len

#     # Initialize weights and biases with shape (seq_len)
#     self.weights_linear = nn.Parameter(torch.Tensor(seq_len))
#     self.bias_linear = nn.Parameter(torch.Tensor(seq_len))
#     self.weights_periodic = nn.Parameter(torch.Tensor(seq_len))
#     self.bias_periodic = nn.Parameter(torch.Tensor(seq_len))

#   def forward(self, x):
#     '''Calculate linear and periodic time features'''
#     # print(x[:,:,:4])
#     x = torch.mean(x[:,:,:4], dim=-1) 
#     # print(x)
#     time_linear = self.weights_linear * x + self.bias_linear # Linear time feature
#     time_linear = time_linear.unsqueeze(-1) # Add dimension (batch, seq_len, 1)
    
#     time_periodic = torch.sin(x * self.weights_periodic + self.bias_periodic)
#     time_periodic = time_periodic.unsqueeze(-1) # Add dimension (batch, seq_len, 1)
#     return torch.nan_to_num(torch.cat([time_linear, time_periodic], dim=-1).swapaxes(1,2)) # shape = (batch, seq_len, 2)

class Time2Vector(nn.Module):
    def __init__(self, seq_len):
        super(Time2Vector, self).__init__()
        self.seq_len = seq_len

        self.weights_linear = nn.Parameter(torch.Tensor(int(self.seq_len)))
        nn.init.uniform_(self.weights_linear)

        self.bias_linear = nn.Parameter(torch.Tensor(int(self.seq_len)))
        nn.init.uniform_(self.bias_linear)

        self.weights_periodic = nn.Parameter(torch.Tensor(int(self.seq_len)))
        nn.init.uniform_(self.weights_periodic)

        self.bias_periodic = nn.Parameter(torch.Tensor(int(self.seq_len)))
        nn.init.uniform_(self.bias_periodic)

    def forward(self, x):
        x = torch.mean(x[:,:,:4], dim=-1)
        time_linear = self.weights_linear * x + self.bias_linear
        time_linear = time_linear.unsqueeze(-1)

        time_periodic = torch.sin(x * self.weights_periodic + self.bias_periodic)
        time_periodic = time_periodic.unsqueeze(-1)

        return torch.cat([time_linear, time_periodic], dim=-1)

class TransAm(nn.Module):
    def __init__(self, feature_size=128, num_layers=1, dropout=0.0):
        super(TransAm, self).__init__()
        self.model_type = 'Transformer'
        
        self.src_mask = None
        # self.pos_encoder = PositionalEncoding(feature_size)
        self.t2v = Time2Vector(128)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=128, nhead=8, batch_first=True, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)     
        self.decoder = nn.Linear(128,1)
        self.init_weights()

    def init_weights(self):
        initrange = 0.1    
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self,src):
        
        if self.src_mask is None or self.src_mask.size(0) != 7:
            device = src.device
            mask = self._generate_square_subsequent_mask(7).to(device)
            self.src_mask = mask
        # print(src.size())
        # src = self.pos_encoder(src)
        # time2vector = self.t2v(src)
        # print(torch.count_nonzero(torch.isnan(time2vector)), time2vector)
        # print(torch.count_nonzero(torch.isnan(src.swapaxes(1,2))),src.swapaxes(1,2))
        # src = torch.concatenate((self.t2v(src), src.swapaxes(1,2)), axis=1)
        t2v = self.t2v(src)
        # print(t2v.size())
        src = torch.concatenate((t2v, src), axis=2)
        # print(torch.isnan(src).any())
        # print(src.size())
        # print(src.size())
        # output = self.transformer_encoder(src.swapaxes(1,2),self.src_mask)#, self.src_mask)
        output = self.transformer_encoder(src.swapaxes(1,2), self.src_mask)
        # print(output.size())
        output = self.decoder(output[:, -1, :])
        return output

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

In [24]:
# encoder_layer = nn.TransformerEncoderLayer(d_model=128, nhead=8, batch_first=True)
# transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
# src = torch.rand(32, 5, 128)
# out = transformer_encoder(src)

In [25]:
model = TransAm().to(device)

criterion = nn.MSELoss()

lr = 0.005 
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.98)



In [26]:
# for i, batch in enumerate(train_loader):
#     # data, targets = get_batch(train_data, i,batch_size)
#     data, targets = batch['inputs'], batch['targets']
#     print(targets)
#     print(targets[-1:])

In [27]:
prev_loss = np.Inf

In [28]:
def train_model(train_data, prev_loss):
    model.train() # Turn on the train mode
    total_loss = 0.
    start_time = time.time()
    mean_absolute_error = MeanAbsoluteError()
    mean_absolute_percentage_error = MeanAbsolutePercentageError()

    total_mae = 0.
    total_mape = 0.

    for i, batch in enumerate(train_loader):
        # data, targets = get_batch(train_data, i,batch_size)
        data, targets = batch['inputs'].to(device), batch['targets'].to(device).unsqueeze(1)
        optimizer.zero_grad()
        # print(data.size())
        output = model(data)  
        # print(output)
        # if calculate_loss_over_all_values:
        #     loss = criterion(output)
        # else:
        loss = criterion(output, targets)
    
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()

        total_loss += loss.item()
        log_interval = int(len(train_data) / batch_size / 5)

        mae = mean_absolute_error(output, targets)
        mape = mean_absolute_percentage_error(output, targets)
        if i % log_interval == 0 and i > 0:
            cur_loss = total_loss / log_interval
            elapsed = time.time() - start_time
            # print(f"| epoch: {epoch} | batch: {i}/{len(train_data) // batch_size} | lr: {round(scheduler.get_lr()[0], 6)} | {round(elapsed * 1000 / log_interval, 2)} ms | loss: {round(cur_loss, 5)} | ppl: {round(math.exp(cur_loss), 2)} | MAE: {round(mae.item(), 4)} | MAPE: {round(mape.item(), 4)}")
            print('| epoch {:3d} | {:5d}/{:5d} batches | '
                  'lr {:02.6f} | {:5.2f} ms | '
                  'loss {:5.5f} | ppl {:8.2f} | '
                  'MAE: {:5.5f} | MAPE: {:5.5}'.format(
                    epoch, i, len(train_data) // batch_size, scheduler.get_lr()[0],
                    elapsed * 1000 / log_interval,
                    cur_loss, math.exp(cur_loss),
                    mae.item(), mape.item()))
            # print()

            total_loss = 0
            start_time = time.time()

        total_mae += mae
        total_mape += mape
            
        if loss < prev_loss:
            
            torch.save(model.state_dict(), 'tranformer_models/state_dict.pt')  # save model state (wights, etc.)
            torch.save(model, 'tranformer_models/model_complete.pt')  # save complete model
            prev_loss = loss

    return prev_loss, total_mae / len(train_loader), total_mape / len(train_loader)


In [36]:
type(prev_loss)

torch.Tensor

In [29]:
# x = torch.tensor([1, 2, np.nan])
# torch.isnan(x).any()

In [30]:
def plot_and_loss(eval_model, data_source,epoch):
    eval_model.eval() 
    total_loss = 0.
    test_result = torch.Tensor(0)    
    truth = torch.Tensor(0)
    with torch.no_grad():
        for i, batch in enumerate(val_loader):
            data, target = batch['inputs'].to(device), batch["targets"].to(device).unsqueeze(1)
            # look like the model returns static values for the output window
            output = eval_model(data)    
            if calculate_loss_over_all_values:                                
                total_loss += criterion(output, target).item()
            else:
                total_loss += criterion(output[-1:], target[-1:]).item()
            
            test_result = torch.cat((test_result, output[-1].view(-1).cpu()), 0) #todo: check this. -> looks good to me
            truth = torch.cat((truth, target[-1].view(-1).cpu()), 0)
            
    #test_result = test_result.cpu().numpy()
    len(test_result)

    plt.plot(test_result,color="red")
    plt.plot(truth[:500],color="blue")
    plt.plot(test_result-truth,color="green")
    plt.grid(True, which='both')
    plt.axhline(y=0, color='k')
    plt.savefig('graph/transformer-epoch%d.png'%epoch)
    plt.close()
    
    return total_loss / i

In [31]:
# for _, batch in enumerate(val_loader):
#     # print(batch["targets"])
#     data, targets = batch['inputs'], batch["targets"]
#     print(data.size())

In [32]:
# def predict_future(eval_model, data_source,steps):
#     eval_model.eval() 
#     total_loss = 0.
#     test_result = torch.Tensor(0)    
#     truth = torch.Tensor(0)
#     _ , data = get_batch(data_source, 0,1)
#     with torch.no_grad():
#         for i in range(0, steps,1):
#             input = torch.clone(data[-input_window:])
#             input[-output_window:] = 0     
#             output = eval_model(data[-input_window:])                        
#             data = torch.cat((data, output[-1:]))
            
#     data = data.cpu().view(-1)
    
#     plt.plot(data,color="red")       
#     plt.plot(data[:input_window],color="blue")
#     plt.grid(True, which='both')
#     plt.axhline(y=0, color='k')
#     plt.savefig('graph/transformer-future%d.png'%steps)
#     plt.close()

In [33]:
def evaluate(eval_model, data_source):
    eval_model.eval() # Turn on the evaluation mode
    total_loss = 0.
    eval_batch_size = 1000

    mean_absolute_error = MeanAbsoluteError()
    mean_absolute_percentage_error = MeanAbsolutePercentageError()

    total_mae = 0.
    total_mape = 0.
    
    with torch.no_grad():
        for _, batch in enumerate(val_loader):
            # print(batch["targets"])
            data, targets = batch['inputs'].to(device), batch["targets"].to(device).unsqueeze(1)
            
            # print(data.size())
            output = eval_model(data)
            total_mae += mean_absolute_error(output, targets)
            total_mape += mean_absolute_percentage_error(output, targets)
            # print(output)
            if calculate_loss_over_all_values:
                total_loss += len(data[0])* criterion(output, targets).cpu().item()
            else:                                
                total_loss += len(data[0])* criterion(output[-1:], targets[-1:]).cpu().item()            
    return total_loss / len(val_loader), total_mae / len(val_loader), total_mape / len(val_loader)

In [34]:
# train_data, val_data = get_data()


# criterion = nn.MSELoss()
# lr = 0.005 
# #optimizer = torch.optim.SGD(model.parameters(), lr=lr)
# optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.98)

best_val_loss = float("inf")
epochs = 100 # The number of epochs
best_model = None
prev_loss = np.Inf

train_mae = []
train_mape = []

val_mae = []
val_mape = []

for epoch in range(1, epochs + 1):
    epoch_start_time = time.time()
    prev_loss, mae_train, mape_train = train_model(train_data, prev_loss)
    
    train_mae.append(mae_train.cpu().detach().numpy())
    train_mape.append(mape_train.cpu().detach().numpy())
    
    # if(epoch % 10 == 0):
        #val_loss = plot_and_loss(model, val_data,epoch)

        # predict_future(model, val_data,200)
        
    # else:
    val_loss, mae_val, mape_val = evaluate(model, val_data)

    val_mae.append(mae_val.cpu().detach().numpy())
    val_mape.append(mape_val.cpu().detach().numpy())
        
    print('-' * 130)
    print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.5f} | valid ppl {:8.2f} | valid MAE {:5.5f} | valid MAPE {:5.5f}'.format(epoch, (time.time() - epoch_start_time),
                                     val_loss, math.exp(val_loss), mae_val, mape_val))
    print('-' * 130)

    # if val_loss < best_val_loss:
    #    best_val_loss = val_loss
    #    best_model = model

    scheduler.step() 

| epoch   1 |    32/  162 batches | lr 0.005000 | 11.18 ms | loss 1.84240 | ppl     6.31 | MAE: 0.16208 | MAPE: 0.34504
| epoch   1 |    64/  162 batches | lr 0.005000 |  9.11 ms | loss 0.01550 | ppl     1.02 | MAE: 0.11635 | MAPE: 0.30846
| epoch   1 |    96/  162 batches | lr 0.005000 | 10.77 ms | loss 0.01190 | ppl     1.01 | MAE: 0.12040 | MAPE: 0.26183
| epoch   1 |   128/  162 batches | lr 0.005000 |  8.79 ms | loss 0.01664 | ppl     1.02 | MAE: 0.21603 | MAPE: 0.4554
| epoch   1 |   160/  162 batches | lr 0.005000 |  8.48 ms | loss 0.01482 | ppl     1.01 | MAE: 0.08665 | MAPE: 0.17414
----------------------------------------------------------------------------------------------------------------------------------
| end of epoch   1 | time:  1.60s | valid loss 1.32961 | valid ppl     3.78 | valid MAE 0.06634 | valid MAPE 0.17875
----------------------------------------------------------------------------------------------------------------------------------
| epoch   2 |    32/  

KeyboardInterrupt: 

In [None]:
model.load_state_dict(torch.load('tranformer_models/state_dict.pt'))

In [None]:
plt.plot(train_mae, label="MAE train")
plt.plot(val_mae, label="MAE validation")
plt.legend()
plt.grid()

In [None]:
plt.plot(train_mape, label="MAPE train")
plt.plot(val_mape, label="MAPE validation")
plt.legend()
plt.grid()

In [None]:
model

In [None]:
test_losses = []
test_preds = []
num_correct = 0
test_real = []
model.eval()
for (combo_data_3) in test_loader:
    inputs, labels = combo_data_3['inputs'], combo_data_3['targets'].unsqueeze(1)
    inputs, labels = inputs.to(device), labels.to(device)
    # output, h = model(inputs, h)
    output = model(inputs)
    # print(output)
    test_preds.append(output.cpu().detach().numpy())
    test_real.append(labels.cpu().detach().numpy())
    test_loss = criterion(output.squeeze(), labels.float())
    test_losses.append(test_loss.item())
    pred = torch.round(output.squeeze()) #rounds the output to 0/1
    correct_tensor = pred.eq(labels.float().view_as(pred))
    correct = np.squeeze(correct_tensor.cpu().numpy())
    num_correct += np.sum(correct)
        
print("Test loss: {:.3f}".format(np.mean(test_losses)))

In [None]:
plt.plot(np.concatenate(test_preds, axis=0), label='prediction')
plt.plot(test_loader.dataset[:]["targets"], label= 'real', alpha =0.5)
plt.legend()
plt.show()

In [None]:
model.eval()
preds = []
with torch.no_grad():
    for batch in test_loader:
        data, targets = batch["inputs"], batch["targets"]

        preds = model(data)
        print(preds)
#   y_val = model(inputCatColumns, inputNumColumns)
#   preds.append(y_val > 0.) # if y_val are logits
#   loss = loss_function(y_val, test_outputs)
#   print(f'Loss: {loss:.8f}')
# preds = torch.stack(preds)

In [40]:
type(model)

__main__.TransAm

In [None]:
# class TimeSeriesDataset(object):
#     def __init__(self, data, categorical_cols, target_col, seq_length, prediction_window=1):
#         '''
#         :param data: dataset of type pandas.DataFrame
#         :param categorical_cols: name of the categorical columns, if None pass empty list
#         :param target_col: name of the targeted column
#         :param seq_length: window length to use
#         :param prediction_window: window length to predict
#         '''
#         self.data = data
#         self.categorical_cols = categorical_cols
#         self.numerical_cols = list(set(data.columns) - set(categorical_cols) - set(target_col))
#         self.target_col = target_col
#         self.seq_length = seq_length
#         self.prediction_window = prediction_window
#         self.preprocessor = None

#     def preprocess_data(self):
#         '''Preprocessing function'''
#         X = self.data.drop(self.target_col, axis=1)
#         y = self.data[self.target_col]

#         self.preprocess = ColumnTransformer(
#             [("scaler", StandardScaler(), self.numerical_cols),
#              ("encoder", OneHotEncoder(), self.categorical_cols)],
#             remainder="passthrough"
#         )

#         X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, shuffle=False)
#         X_train = self.preprocessor.fit_transform(X_train)
#         X_test = self.preprocessor.transform(X_test)

#         if self.target_col:
#             return X_train, X_test, y_train.values, y_test.values
#         return X_train, X_test

#     def frame_series(self, X, y=None):
#         '''
#         Function used to prepare the data for time series prediction
#         :param X: set of features
#         :param y: targeted value to predict
#         :return: TensorDataset
#         '''
#         nb_obs, nb_features = X.shape
#         features, target, y_hist = [], [], []

#         for i in range(1, nb_obs - self.seq_length - self.prediction_window):
#             features.append(torch.FloatTensor(X[i:i + self.seq_length, :]).unsqueeze(0))

#         features_var = torch.cat(features)

#         if y is not None:
#             for i in range(1, nb_obs - self.seq_length - self.prediction_window):
#                 target.append(
#                     torch.tensor(y[i + self.seq_length:i + self.seq_length + self.prediction_window]))
#                 y_hist.append(
#                     torch.tensor(y[i + self.seq_length - 1:i + self.seq_length + self.prediction_window - 1]))
#             target_var, y_hist_var = torch.cat(target), torch.cat(y_hist)
#             return TensorDataset(features_var, target_var, y_hist_var)
#         return TensorDataset(features_var)

#     def get_loaders(self, batch_size: int):
#         '''
#         Preprocess and frame the dataset
#         :param batch_size: batch size
#         :return: DataLoaders associated to training and testing data
#         '''
#         X_train, X_test, y_train, y_test = self.preprocess_data()

#         train_dataset = self.frame_series(X_train, y_train)
#         test_dataset = self.frame_series(X_test, y_test)

#         train_iter = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
#         test_iter = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
#         return train_iter, test_iter

In [None]:
device

In [None]:

# model.eval() # Turn on the evaluation mode
# # total_loss = 0.
# # eval_batch_size = 1000
# preds = []
# # with torch.no_grad():
# preds_batch = []
# for _, batch in enumerate(test_loader):
#     # print(batch["targets"])
#     data, targets = batch['inputs'].to(device), batch["targets"].to(device).unsqueeze(1)
#     print(data, targets)
#     # print(data.size())
#     output = model(data)
#     # preds_batch.append(output)
#     print(output)
#     break
#     # loss = criterion(output, targets).cpu().item()
#     # print(f'Loss: {loss:.8f}')
#     # # if calculate_loss_over_all_values:
#     # #     total_loss += len(data[0])* criterion(output, targets).cpu().item()
#     # # else:                                
#     # #     total_loss += len(data[0])* criterion(output[-1:], targets[-1:]).cpu().item() 
               
# # return total_loss / len(data_source)


In [None]:
# (np.concatenate(preds, axis=0)).shape

In [None]:
# plt.plot()

In [None]:
# model.eval()
# preds = []
# with torch.no_grad():
#   y_val = model(inputCatColumns, inputNumColumns)
#   preds.append(y_val > 0.) # if y_val are logits
#   loss = loss_function(y_val, test_outputs)
#   print(f'Loss: {loss:.8f}')
# preds = torch.stack(preds)

In [37]:
prev_loss

tensor(0.0005, grad_fn=<MseLossBackward0>)

In [39]:
type(mae_train)

torch.Tensor