In [27]:
import reservoirtransformers
import math
import os
import warnings
from dataclasses import dataclass
from typing import List, Optional, Tuple, Union

import torch
import torch.utils.checkpoint
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
import torch.nn.functional as F

In [34]:
from configuration import ReservoirTConfig

configuration = ReservoirTConfig()

configuration.output_size=8
configuration.re_output_size=2
configuration.max_sequence_length=3399
configuration.sequence_length=334
configuration.pred_len=720
configuration.hidden_size=8
configuration.num_attention_heads=2
#configuration.hidden_dropout_prob=0.0
configuration.num_hidden_layers=4
configuration.num_reservoirs = 10
configuration.intermediate_size=64
configuration.reservoir_size = [30, 15, 20, 25, 30, 35, 40, 45, 50, 50]
configuration.spectral_radius = [0.6, 0.8, 0.55, 0.6, 0.5, 0.4, 0.3, 0.2, 0.81, 0.05]
configuration.sparsity = [0.6, 0.55, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15]
configuration.leaky = [0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39]
#configuration.attention_probs_dropout_prob=0.0
#bert_model = TabularBertForRegression(config=configuration).to("cpu", dtype=float)
model = reservoirtransformers.ReservoirTTimeSeries(config=configuration).to("cpu", dtype=float)

In [35]:
configuration.decoder_dropout = 0.7

In [23]:
import numpy as np
# prepare data for lstm
from sklearn.preprocessing import StandardScaler
from pandas import read_csv
from pandas import DataFrame
import random
from sklearn.model_selection import train_test_split
from pandas import concat
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
dataset= read_csv('exchange_rate.csv')
dataset=dataset.dropna()
dataset = dataset.drop(['date'], axis = 1)
dataset = dataset.dropna()
#data = dataset.values[0:14000]


y = dataset.OT.values


X = dataset.values

scaler = StandardScaler()
X = scaler.fit_transform(X)



#X=X[1:]

#Reservoir_id = np.array([[0] * len(X[0])] + X[:-1].tolist())
# Create a zero column of shape (100, 1)
'''
zero_col = np.zeros((X.shape[0], 1))

# Concatenate the original array with the zero column along the second axis (columns)
X = np.hstack((X, zero_col))
#X =  dataset.drop(['ate'], axis = 1).values

#X_train, X_test, y_train, y_test =train_test_split(X.values, y, test_size=0.2, shuffle=False)
'''

"\nzero_col = np.zeros((X.shape[0], 1))\n\n# Concatenate the original array with the zero column along the second axis (columns)\nX = np.hstack((X, zero_col))\n#X =  dataset.drop(['ate'], axis = 1).values\n\n#X_train, X_test, y_train, y_test =train_test_split(X.values, y, test_size=0.2, shuffle=False)\n"

In [4]:
X.shape

(7588, 8)

In [24]:
from tqdm.auto import tqdm
# 1. Preprocess the data into the required format
def create_sequences(data, seq_length, pred_length):
    sequences = []
    targets = []
    for i in tqdm(range(len(data) - seq_length - pred_length + 1)):
        sequences.append(data[i:i+seq_length])
        targets.append(data[i+seq_length:i+seq_length+pred_length]) #targets.append(data[i+seq_length:i+seq_length+pred_length, -1:])
    return torch.tensor(sequences), torch.tensor(targets)

X, y = create_sequences(X, seq_length=configuration.sequence_length, pred_length=configuration.pred_len)
# Zeros tensor of shape [16941, 384, 1]

zeros = torch.zeros((X.size(0), X.size(1), 4), dtype=X.dtype)

# Concatenate along the last dimension
#X = torch.cat((X, zeros), dim=-1)

  0%|          | 0/6535 [00:00<?, ?it/s]

In [9]:
X.shape, y.shape

(torch.Size([6535, 334, 8]), torch.Size([6535, 720, 8]))

In [25]:

batch=64
indices = np.arange(len(X)) 
barrier = int(len(indices)/batch)*batch
indices = indices[0:barrier]
soft_border = int((configuration.sequence_length/batch))+8

indices = [indices[i:i+batch] for i in range(0, len(indices), batch)]

border1 = int(len(indices)*0.7)
border2 = border1+int(len(indices)*0.1)
border3 = border2+int(len(indices)*0.2)

train_ind = indices[0:border1]
val_ind = indices[border1-soft_border: border2]
test_ind = indices[border2-soft_border: border3]

random.shuffle(train_ind)
random.shuffle(val_ind)
#random.shuffle(test_ind)


X_train = [X[item] for sublist in train_ind for item in sublist]
y_train = [y[item] for sublist in train_ind for item in sublist]

X_val = [X[item] for sublist in val_ind for item in sublist]
y_val = [y[item] for sublist in val_ind for item in sublist]

X_test = [X[item] for sublist in test_ind for item in sublist]
y_test = [y[item] for sublist in test_ind for item in sublist]

#train_indices, test_indices =train_test_split(indices,  test_size=0.2, shuffle=False)
#indices = [item for sublist in indices for item in sublist]

In [11]:
import torch
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, tokenized_inputs,  labels=None, pos=None):
        self.tokenized_inputs = tokenized_inputs
        self.labels = labels
        self.pos = pos
        self.id_list = None
        self.re = None

    def __len__(self):
        return len(self.tokenized_inputs)

    def __getitem__(self, idx):
        if self.labels is not None:
            return {
                "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
                "labels_ids": torch.tensor(self.labels[idx]),
                #"id": torch.tensor(self.id_list[idx]),  # Include the id directly
                #"reservoir_ids": torch.tensor(self.re[idx]),
            }
        else:
            return {
                "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
            }

# Assuming you have X_train, y_train, X_test, y_test, trainpos, and testpos defined


train_dataset = CustomDataset(X_train, y_train)

test_dataset = CustomDataset(X_test, y_test)

val_dataset = CustomDataset(X_val, y_val)

In [29]:
from transformers import Trainer, TrainingArguments
from torch.utils.data import DataLoader
from torch.cuda.amp import GradScaler
class CustomTrainer(Trainer):
    def __init__(self, *args, gradient_accumulation_steps=1, **kwargs):
        super().__init__(*args, **kwargs)
        self.gradient_accumulation_steps = gradient_accumulation_steps
        self.scaler = GradScaler()

    def training_step(self, model, inputs):
        model.train()
        inputs = self._prepare_inputs(inputs)
        loss = self.compute_loss(model, inputs)

        if self.args.n_gpu > 1:
            loss = loss.mean()  # mean() to average on multi-gpu parallel training

        loss = loss / self.gradient_accumulation_steps
        self.scaler.scale(loss).backward()

        return loss.detach()


    def get_train_dataloader(self) -> DataLoader:
        """
        Returns the training [`~torch.utils.data.DataLoader`].
        Will use no sampler if `train_dataset` does not implement `__len__`, a random sampler (adapted to distributed
        training if necessary) otherwise.
        Subclass and override this method if you want to inject some custom behavior.
        """
        if self.train_dataset is None:
            raise ValueError("Trainer: training requires a train_dataset.")

        train_dataset = self.train_dataset


        loader =  DataLoader(
            train_dataset,
            batch_size=self._train_batch_size,
            drop_last=self.args.dataloader_drop_last,
            shuffle = False,
        )
        return loader



In [36]:
from transformers import BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import mean_squared_error
from transformers import Trainer, TrainingArguments
from transformers import EarlyStoppingCallback, IntervalStrategy
from sklearn.metrics import r2_score, accuracy_score
import numpy as np

def compute_metrics1(p):

    preds = p.predictions.flatten()
    labels = p.label_ids.flatten()

    r2 = r2_score(labels, preds)
    mse = mean_squared_error(labels, preds)
    
    return {"r2_score": r2, "mse": mse}

def compute_metrics_classification(p):
    preds = np.argmax(p.predictions , axis=1)
    labels = p.label_ids
    accuracy = accuracy_score(labels, preds)
    return {"accuracy_score": accuracy}

def compute_metrics(p):
    prediction_scores, labels_ids = p
    #print('here')
    #print(prediction_scores)

    mask = labels_ids != 100
    #print(mask)
    masked_predictions = prediction_scores[mask]
    masked_labels = labels_ids[mask]

    mse = mean_squared_error(masked_predictions, masked_labels)
    return {"mse": mse}

training_args = TrainingArguments(
    output_dir='./results_task1',
    num_train_epochs=150,
    label_names=["labels_ids"],
    disable_tqdm = True,
    #label_names=["labels_mask"],
    do_eval=True,
    learning_rate=0.001,
    per_device_train_batch_size=batch,
    per_device_eval_batch_size=batch,
    logging_dir='./logs',
    logging_strategy="steps",
    logging_steps=50,
    evaluation_strategy="steps",
    eval_steps = 50,
    save_strategy="steps",
    save_steps=50,

    save_total_limit=2,
    load_best_model_at_end=True,
)

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics1, #compute_metrics1,#compute_metrics_classification,
    callbacks = [EarlyStoppingCallback(early_stopping_patience=10)]
)

trainer.train()

  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),
Could not estimate the number of tokens of the input, floating-point operations will not be computed


{'loss': 0.7848, 'learning_rate': 0.000995305164319249, 'epoch': 0.7}
{'eval_loss': 1.0681026708843784, 'eval_r2_score': -0.6167644735685114, 'eval_mse': 1.0681026708843786, 'eval_runtime': 5.3413, 'eval_samples_per_second': 275.59, 'eval_steps_per_second': 4.306, 'epoch': 0.7}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.7294, 'learning_rate': 0.0009906103286384976, 'epoch': 1.41}
{'eval_loss': 1.021077485829969, 'eval_r2_score': -0.5455834432879654, 'eval_mse': 1.0210774858299698, 'eval_runtime': 5.3716, 'eval_samples_per_second': 274.034, 'eval_steps_per_second': 4.282, 'epoch': 1.41}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.6365, 'learning_rate': 0.0009859154929577464, 'epoch': 2.11}
{'eval_loss': 0.8883988983308444, 'eval_r2_score': -0.3447506652047261, 'eval_mse': 0.8883988983308443, 'eval_runtime': 5.3993, 'eval_samples_per_second': 272.628, 'eval_steps_per_second': 4.26, 'epoch': 2.11}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.5403, 'learning_rate': 0.0009812206572769953, 'epoch': 2.82}
{'eval_loss': 0.9751319269644539, 'eval_r2_score': -0.47603662038702566, 'eval_mse': 0.9751319269644542, 'eval_runtime': 5.3532, 'eval_samples_per_second': 274.978, 'eval_steps_per_second': 4.297, 'epoch': 2.82}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.5537, 'learning_rate': 0.0009765258215962442, 'epoch': 3.52}
{'eval_loss': 0.5976069859436105, 'eval_r2_score': 0.095414916217754, 'eval_mse': 0.5976069859436106, 'eval_runtime': 5.3598, 'eval_samples_per_second': 274.637, 'eval_steps_per_second': 4.291, 'epoch': 3.52}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.5371, 'learning_rate': 0.000971830985915493, 'epoch': 4.23}
{'eval_loss': 0.9211156506897389, 'eval_r2_score': -0.3942733228539239, 'eval_mse': 0.9211156506897396, 'eval_runtime': 5.3537, 'eval_samples_per_second': 274.951, 'eval_steps_per_second': 4.296, 'epoch': 4.23}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.48, 'learning_rate': 0.0009671361502347418, 'epoch': 4.93}
{'eval_loss': 0.7441629548959987, 'eval_r2_score': -0.12642376132758004, 'eval_mse': 0.744162954895998, 'eval_runtime': 5.3612, 'eval_samples_per_second': 274.568, 'eval_steps_per_second': 4.29, 'epoch': 4.93}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.4567, 'learning_rate': 0.0009624413145539907, 'epoch': 5.63}
{'eval_loss': 0.6773712899931938, 'eval_r2_score': -0.0253226276173395, 'eval_mse': 0.6773712899931938, 'eval_runtime': 5.3475, 'eval_samples_per_second': 275.268, 'eval_steps_per_second': 4.301, 'epoch': 5.63}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.515, 'learning_rate': 0.0009577464788732394, 'epoch': 6.34}
{'eval_loss': 0.5658575111281522, 'eval_r2_score': 0.14347342626116522, 'eval_mse': 0.5658575111281522, 'eval_runtime': 5.3636, 'eval_samples_per_second': 274.444, 'eval_steps_per_second': 4.288, 'epoch': 6.34}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.4489, 'learning_rate': 0.0009530516431924883, 'epoch': 7.04}
{'eval_loss': 0.673077564581978, 'eval_r2_score': -0.018823306069565726, 'eval_mse': 0.6730775645819771, 'eval_runtime': 5.371, 'eval_samples_per_second': 274.063, 'eval_steps_per_second': 4.282, 'epoch': 7.04}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.4157, 'learning_rate': 0.0009483568075117372, 'epoch': 7.75}
{'eval_loss': 0.7054235427920029, 'eval_r2_score': -0.0677847307137811, 'eval_mse': 0.7054235427920027, 'eval_runtime': 5.3484, 'eval_samples_per_second': 275.221, 'eval_steps_per_second': 4.3, 'epoch': 7.75}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.4331, 'learning_rate': 0.0009436619718309858, 'epoch': 8.45}
{'eval_loss': 0.6522408168986042, 'eval_r2_score': 0.012716839196893481, 'eval_mse': 0.6522408168986054, 'eval_runtime': 5.4361, 'eval_samples_per_second': 270.781, 'eval_steps_per_second': 4.231, 'epoch': 8.45}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.4218, 'learning_rate': 0.0009389671361502347, 'epoch': 9.15}
{'eval_loss': 0.5638659088651904, 'eval_r2_score': 0.14648807257936092, 'eval_mse': 0.5638659088651896, 'eval_runtime': 5.6063, 'eval_samples_per_second': 262.56, 'eval_steps_per_second': 4.102, 'epoch': 9.15}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.3901, 'learning_rate': 0.0009342723004694836, 'epoch': 9.86}
{'eval_loss': 1.1308335948592187, 'eval_r2_score': -0.7117189494267835, 'eval_mse': 1.1308335948592174, 'eval_runtime': 5.4495, 'eval_samples_per_second': 270.115, 'eval_steps_per_second': 4.221, 'epoch': 9.86}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.3856, 'learning_rate': 0.0009295774647887324, 'epoch': 10.56}
{'eval_loss': 0.6612995787723908, 'eval_r2_score': -0.0009952174913718803, 'eval_mse': 0.6612995787723914, 'eval_runtime': 5.4526, 'eval_samples_per_second': 269.962, 'eval_steps_per_second': 4.218, 'epoch': 10.56}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.3946, 'learning_rate': 0.0009248826291079812, 'epoch': 11.27}
{'eval_loss': 0.820601674612311, 'eval_r2_score': -0.24212743833464945, 'eval_mse': 0.8206016746123102, 'eval_runtime': 5.4217, 'eval_samples_per_second': 271.502, 'eval_steps_per_second': 4.242, 'epoch': 11.27}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.3523, 'learning_rate': 0.00092018779342723, 'epoch': 11.97}
{'eval_loss': 0.7283873841131674, 'eval_r2_score': -0.10254461273334692, 'eval_mse': 0.7283873841131677, 'eval_runtime': 5.4317, 'eval_samples_per_second': 271.004, 'eval_steps_per_second': 4.234, 'epoch': 11.97}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.3463, 'learning_rate': 0.0009154929577464789, 'epoch': 12.68}
{'eval_loss': 0.5877414968851542, 'eval_r2_score': 0.11034809882171226, 'eval_mse': 0.5877414968851538, 'eval_runtime': 5.4427, 'eval_samples_per_second': 270.455, 'eval_steps_per_second': 4.226, 'epoch': 12.68}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.3594, 'learning_rate': 0.0009107981220657277, 'epoch': 13.38}
{'eval_loss': 0.6466713695490834, 'eval_r2_score': 0.02114719412210131, 'eval_mse': 0.6466713695490832, 'eval_runtime': 5.4125, 'eval_samples_per_second': 271.964, 'eval_steps_per_second': 4.249, 'epoch': 13.38}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.3287, 'learning_rate': 0.0009061032863849765, 'epoch': 14.08}
{'eval_loss': 0.6029598075515977, 'eval_r2_score': 0.08731246310622043, 'eval_mse': 0.6029598075515984, 'eval_runtime': 5.4547, 'eval_samples_per_second': 269.858, 'eval_steps_per_second': 4.217, 'epoch': 14.08}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.3129, 'learning_rate': 0.0009014084507042254, 'epoch': 14.79}
{'eval_loss': 0.7119927239623591, 'eval_r2_score': -0.07772836162697017, 'eval_mse': 0.7119927239623581, 'eval_runtime': 5.3691, 'eval_samples_per_second': 274.16, 'eval_steps_per_second': 4.284, 'epoch': 14.79}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.3159, 'learning_rate': 0.0008967136150234741, 'epoch': 15.49}
{'eval_loss': 0.6086954390134597, 'eval_r2_score': 0.07863055879701908, 'eval_mse': 0.6086954390134606, 'eval_runtime': 5.4667, 'eval_samples_per_second': 269.268, 'eval_steps_per_second': 4.207, 'epoch': 15.49}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.3019, 'learning_rate': 0.000892018779342723, 'epoch': 16.2}
{'eval_loss': 0.6121429928242009, 'eval_r2_score': 0.07341206934477906, 'eval_mse': 0.6121429928242001, 'eval_runtime': 5.4095, 'eval_samples_per_second': 272.112, 'eval_steps_per_second': 4.252, 'epoch': 16.2}
{'train_runtime': 543.6484, 'train_samples_per_second': 1253.751, 'train_steps_per_second': 19.59, 'train_loss': 0.4539470515043839, 'epoch': 16.2}


TrainOutput(global_step=1150, training_loss=0.4539470515043839, metrics={'train_runtime': 543.6484, 'train_samples_per_second': 1253.751, 'train_steps_per_second': 19.59, 'train_loss': 0.4539470515043839, 'epoch': 16.2})

In [39]:
model = reservoirtransformers.ReservoirTTimeSeries.from_pretrained("results_task1/checkpoint-650", config=configuration).to("cuda", dtype=float)


In [40]:
cnt = 0
ln = len(X_test)
y_pred = []
y_test1 = []
while cnt < ln:
    #print(cnt, ln)
    input_ids = torch.stack(X_test[cnt:cnt+batch], dim=0)
    #y_test1 = y_test1 + [k.detach().numpy().flatten() for k in y_test[cnt:cnt+64]]
    
    output = model(inputs_embeds = input_ids.to(model.device))['logits']
    y_pred = y_pred + list(output.cpu().detach().numpy().reshape(output.size(0), -1))
    #y_test = y_test + labels_ids
    
    cnt=cnt+batch


In [41]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
y_test1 = [i.detach().numpy().flatten() for i in y_test]

mse = mean_squared_error(y_test1, y_pred)
mse

0.5869734883425769