In [1]:
import reservoirtransformers
import math
import os
import warnings
from dataclasses import dataclass
from typing import List, Optional, Tuple, Union

import torch
import torch.utils.checkpoint
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
import torch.nn.functional as F

In [2]:
from configuration import ReservoirTConfig

configuration = ReservoirTConfig()

configuration.output_size=21
configuration.re_output_size=42
configuration.max_sequence_length=3399
configuration.sequence_length=250
configuration.pred_len=24
configuration.hidden_size=21
configuration.num_attention_heads=7
configuration.hidden_dropout_prob=0.0
configuration.num_hidden_layers=16
configuration.num_reservoirs = 10
configuration.intermediate_size=128
configuration.reservoir_size = [30, 15, 20, 25, 30, 35, 40, 45, 50, 50]
configuration.spectral_radius = [0.6, 0.8, 0.55, 0.6, 0.5, 0.4, 0.3, 0.2, 0.81, 0.05]
configuration.sparsity = [0.6, 0.55, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15]
configuration.leaky = [0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39]
configuration.attention_probs_dropout_prob=0.0
#bert_model = TabularBertForRegression(config=configuration).to("cpu", dtype=float)
model = reservoirtransformers.ReservoirTTimeSeries(config=configuration).to("cpu", dtype=float)


In [3]:
import numpy as np
# prepare data for lstm
from sklearn.preprocessing import StandardScaler
from pandas import read_csv
from pandas import DataFrame
import random
from sklearn.model_selection import train_test_split
from pandas import concat
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
dataset= read_csv('weather.csv')
dataset=dataset.dropna()
dataset = dataset.drop(['date'], axis = 1)
dataset = dataset.dropna()
#data = dataset.values[0:14000]


y = dataset.OT.values


X = dataset.values

scaler = StandardScaler()
X = scaler.fit_transform(X)

X = X[0:3000]

#X=X[1:]

#Reservoir_id = np.array([[0] * len(X[0])] + X[:-1].tolist())
# Create a zero column of shape (100, 1)
'''
zero_col = np.zeros((X.shape[0], 1))

# Concatenate the original array with the zero column along the second axis (columns)
X = np.hstack((X, zero_col))
#X =  dataset.drop(['ate'], axis = 1).values

#X_train, X_test, y_train, y_test =train_test_split(X.values, y, test_size=0.2, shuffle=False)
'''

"\nzero_col = np.zeros((X.shape[0], 1))\n\n# Concatenate the original array with the zero column along the second axis (columns)\nX = np.hstack((X, zero_col))\n#X =  dataset.drop(['ate'], axis = 1).values\n\n#X_train, X_test, y_train, y_test =train_test_split(X.values, y, test_size=0.2, shuffle=False)\n"

In [4]:
X.shape

(3000, 21)

In [5]:
from tqdm.auto import tqdm
# 1. Preprocess the data into the required format
def create_sequences(data, seq_length, pred_length):
    sequences = []
    targets = []
    for i in tqdm(range(len(data) - seq_length - pred_length + 1)):
        sequences.append(data[i:i+seq_length])
        targets.append(data[i+seq_length:i+seq_length+pred_length])
    return torch.tensor(sequences), torch.tensor(targets)

X, y = create_sequences(X, seq_length=configuration.sequence_length, pred_length=configuration.pred_len)
# Zeros tensor of shape [16941, 384, 1]

zeros = torch.zeros((X.size(0), X.size(1), 3), dtype=X.dtype)

# Concatenate along the last dimension
#X = torch.cat((X, zeros), dim=-1)

  0%|          | 0/2727 [00:00<?, ?it/s]

  return torch.tensor(sequences), torch.tensor(targets)


In [6]:

batch=64
indices = np.arange(len(X)) 
barrier = int(len(indices)/batch)*batch
indices = indices[0:barrier]
soft_border = int((configuration.sequence_length/batch))+8

indices = [indices[i:i+batch] for i in range(0, len(indices), batch)]

border1 = int(len(indices)*0.7)
border2 = border1+int(len(indices)*0.1)
border3 = border2+int(len(indices)*0.2)

train_ind = indices[0:border1]
val_ind = indices[border1-soft_border: border2]
test_ind = indices[border2-soft_border: border3]

random.shuffle(train_ind)
random.shuffle(val_ind)
#random.shuffle(test_ind)


X_train = [X[item] for sublist in train_ind for item in sublist]
y_train = [y[item] for sublist in train_ind for item in sublist]

X_val = [X[item] for sublist in val_ind for item in sublist]
y_val = [y[item] for sublist in val_ind for item in sublist]

X_test = [X[item] for sublist in test_ind for item in sublist]
y_test = [y[item] for sublist in test_ind for item in sublist]

#train_indices, test_indices =train_test_split(indices,  test_size=0.2, shuffle=False)
#indices = [item for sublist in indices for item in sublist]

In [7]:
import torch
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, tokenized_inputs,  labels=None, pos=None):
        self.tokenized_inputs = tokenized_inputs
        self.labels = labels
        self.pos = pos
        self.id_list = None
        self.re = None

    def __len__(self):
        return len(self.tokenized_inputs)

    def __getitem__(self, idx):
        if self.labels is not None:
            return {
                "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
                "labels_ids": torch.tensor(self.labels[idx]),
                #"id": torch.tensor(self.id_list[idx]),  # Include the id directly
                #"reservoir_ids": torch.tensor(self.re[idx]),
            }
        else:
            return {
                "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
            }

# Assuming you have X_train, y_train, X_test, y_test, trainpos, and testpos defined


train_dataset = CustomDataset(X_train, y_train)

test_dataset = CustomDataset(X_test, y_test)

val_dataset = CustomDataset(X_val, y_val)

In [8]:
from transformers import Trainer, TrainingArguments
from torch.utils.data import DataLoader
from torch.cuda.amp import GradScaler
class CustomTrainer(Trainer):
    def __init__(self, *args, gradient_accumulation_steps=1, **kwargs):
        super().__init__(*args, **kwargs)
        self.gradient_accumulation_steps = gradient_accumulation_steps
        self.scaler = GradScaler()

    def training_step(self, model, inputs):
        model.train()
        inputs = self._prepare_inputs(inputs)
        loss = self.compute_loss(model, inputs)

        if self.args.n_gpu > 1:
            loss = loss.mean()  # mean() to average on multi-gpu parallel training

        loss = loss / self.gradient_accumulation_steps
        self.scaler.scale(loss).backward()

        return loss.detach()


    def get_train_dataloader(self) -> DataLoader:
        """
        Returns the training [`~torch.utils.data.DataLoader`].
        Will use no sampler if `train_dataset` does not implement `__len__`, a random sampler (adapted to distributed
        training if necessary) otherwise.
        Subclass and override this method if you want to inject some custom behavior.
        """
        if self.train_dataset is None:
            raise ValueError("Trainer: training requires a train_dataset.")

        train_dataset = self.train_dataset


        loader =  DataLoader(
            train_dataset,
            batch_size=self._train_batch_size,
            drop_last=self.args.dataloader_drop_last,
            shuffle = False,
        )
        return loader

#bert_model = TabularBertForSequenceClassification(config=configuration).to("cpu", dtype=float)


In [9]:
from transformers import BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import mean_squared_error
from transformers import Trainer, TrainingArguments
from transformers import EarlyStoppingCallback, IntervalStrategy
from sklearn.metrics import r2_score, accuracy_score
import numpy as np

def compute_metrics1(p):

    preds = p.predictions.flatten()
    labels = p.label_ids.flatten()

    r2 = r2_score(labels, preds)
    mse = mean_squared_error(labels, preds)
    
    return {"r2_score": r2, "mse": mse}

def compute_metrics_classification(p):
    preds = np.argmax(p.predictions , axis=1)
    labels = p.label_ids
    accuracy = accuracy_score(labels, preds)
    return {"accuracy_score": accuracy}

def compute_metrics(p):
    prediction_scores, labels_ids = p
    #print('here')
    #print(prediction_scores)

    mask = labels_ids != 100
    #print(mask)
    masked_predictions = prediction_scores[mask]
    masked_labels = labels_ids[mask]

    mse = mean_squared_error(masked_predictions, masked_labels)
    return {"mse": mse}

training_args = TrainingArguments(
    output_dir='./results_task1',
    num_train_epochs=150,
    label_names=["labels_ids"],
    disable_tqdm = True,
    #label_names=["labels_mask"],
    do_eval=True,
    #learning_rate=0.001,
    per_device_train_batch_size=batch,
    per_device_eval_batch_size=batch,
    logging_dir='./logs',
    logging_strategy="steps",
    logging_steps=50,
    evaluation_strategy="steps",
    eval_steps = 50,
    save_strategy="steps",
    save_steps=50,

    save_total_limit=2,
    load_best_model_at_end=True,
)

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics1, #compute_metrics1,#compute_metrics_classification,
    callbacks = [EarlyStoppingCallback(early_stopping_patience=10)]
)

trainer.train()

  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),
Could not estimate the number of tokens of the input, floating-point operations will not be computed


{'loss': 0.5227, 'learning_rate': 4.9425287356321845e-05, 'epoch': 1.72}
{'eval_loss': 0.359530381950005, 'eval_r2_score': 0.05151118490964757, 'eval_mse': 0.359530381950005, 'eval_runtime': 7.7084, 'eval_samples_per_second': 124.54, 'eval_steps_per_second': 1.946, 'epoch': 1.72}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.417, 'learning_rate': 4.885057471264368e-05, 'epoch': 3.45}
{'eval_loss': 0.2923228439107427, 'eval_r2_score': 0.22881358081360137, 'eval_mse': 0.29232284391074287, 'eval_runtime': 7.6507, 'eval_samples_per_second': 125.479, 'eval_steps_per_second': 1.961, 'epoch': 3.45}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.3627, 'learning_rate': 4.827586206896552e-05, 'epoch': 5.17}
{'eval_loss': 0.25363473020290417, 'eval_r2_score': 0.33087795414918086, 'eval_mse': 0.2536347302029041, 'eval_runtime': 7.6455, 'eval_samples_per_second': 125.565, 'eval_steps_per_second': 1.962, 'epoch': 5.17}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.3005, 'learning_rate': 4.770114942528736e-05, 'epoch': 6.9}
{'eval_loss': 0.2303514656930084, 'eval_r2_score': 0.3923022928841954, 'eval_mse': 0.23035146569300838, 'eval_runtime': 7.4759, 'eval_samples_per_second': 128.413, 'eval_steps_per_second': 2.006, 'epoch': 6.9}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2897, 'learning_rate': 4.7126436781609195e-05, 'epoch': 8.62}
{'eval_loss': 0.21152361654285362, 'eval_r2_score': 0.44197265518924667, 'eval_mse': 0.2115236165428536, 'eval_runtime': 7.4716, 'eval_samples_per_second': 128.486, 'eval_steps_per_second': 2.008, 'epoch': 8.62}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2623, 'learning_rate': 4.655172413793104e-05, 'epoch': 10.34}
{'eval_loss': 0.2013374237162243, 'eval_r2_score': 0.4688451823787708, 'eval_mse': 0.20133742371622426, 'eval_runtime': 7.6268, 'eval_samples_per_second': 125.872, 'eval_steps_per_second': 1.967, 'epoch': 10.34}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2585, 'learning_rate': 4.597701149425287e-05, 'epoch': 12.07}
{'eval_loss': 0.19527750536215255, 'eval_r2_score': 0.48483205043710664, 'eval_mse': 0.1952775053621525, 'eval_runtime': 7.5039, 'eval_samples_per_second': 127.933, 'eval_steps_per_second': 1.999, 'epoch': 12.07}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2419, 'learning_rate': 4.5402298850574716e-05, 'epoch': 13.79}
{'eval_loss': 0.18762061637251856, 'eval_r2_score': 0.5050319387627229, 'eval_mse': 0.18762061637251856, 'eval_runtime': 7.547, 'eval_samples_per_second': 127.202, 'eval_steps_per_second': 1.988, 'epoch': 13.79}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2546, 'learning_rate': 4.482758620689655e-05, 'epoch': 15.52}
{'eval_loss': 0.17949992462025208, 'eval_r2_score': 0.5264554002683832, 'eval_mse': 0.179499924620252, 'eval_runtime': 7.637, 'eval_samples_per_second': 125.703, 'eval_steps_per_second': 1.964, 'epoch': 15.52}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2278, 'learning_rate': 4.4252873563218394e-05, 'epoch': 17.24}
{'eval_loss': 0.18060288881558015, 'eval_r2_score': 0.5235456344871441, 'eval_mse': 0.18060288881558023, 'eval_runtime': 7.652, 'eval_samples_per_second': 125.457, 'eval_steps_per_second': 1.96, 'epoch': 17.24}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2341, 'learning_rate': 4.367816091954024e-05, 'epoch': 18.97}
{'eval_loss': 0.179231173092176, 'eval_r2_score': 0.527164402431249, 'eval_mse': 0.179231173092176, 'eval_runtime': 7.5943, 'eval_samples_per_second': 126.411, 'eval_steps_per_second': 1.975, 'epoch': 18.97}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.23, 'learning_rate': 4.3103448275862066e-05, 'epoch': 20.69}
{'eval_loss': 0.17262003764195075, 'eval_r2_score': 0.5446054542710848, 'eval_mse': 0.17262003764195075, 'eval_runtime': 7.6278, 'eval_samples_per_second': 125.856, 'eval_steps_per_second': 1.966, 'epoch': 20.69}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2312, 'learning_rate': 4.252873563218391e-05, 'epoch': 22.41}
{'eval_loss': 0.16991858173899974, 'eval_r2_score': 0.5517322531093678, 'eval_mse': 0.16991858173899982, 'eval_runtime': 7.4554, 'eval_samples_per_second': 128.766, 'eval_steps_per_second': 2.012, 'epoch': 22.41}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2388, 'learning_rate': 4.195402298850575e-05, 'epoch': 24.14}
{'eval_loss': 0.17119650418281943, 'eval_r2_score': 0.5483609242721726, 'eval_mse': 0.17119650418281945, 'eval_runtime': 7.5491, 'eval_samples_per_second': 127.167, 'eval_steps_per_second': 1.987, 'epoch': 24.14}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2157, 'learning_rate': 4.1379310344827587e-05, 'epoch': 25.86}
{'eval_loss': 0.16806787209914115, 'eval_r2_score': 0.556614670511383, 'eval_mse': 0.16806787209914112, 'eval_runtime': 7.485, 'eval_samples_per_second': 128.256, 'eval_steps_per_second': 2.004, 'epoch': 25.86}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.23, 'learning_rate': 4.080459770114943e-05, 'epoch': 27.59}
{'eval_loss': 0.16299569662669627, 'eval_r2_score': 0.5699957418903758, 'eval_mse': 0.1629956966266963, 'eval_runtime': 7.4761, 'eval_samples_per_second': 128.41, 'eval_steps_per_second': 2.006, 'epoch': 27.59}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2167, 'learning_rate': 4.0229885057471265e-05, 'epoch': 29.31}
{'eval_loss': 0.1631501404939719, 'eval_r2_score': 0.5695882984919189, 'eval_mse': 0.16315014049397186, 'eval_runtime': 7.4802, 'eval_samples_per_second': 128.339, 'eval_steps_per_second': 2.005, 'epoch': 29.31}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2222, 'learning_rate': 3.965517241379311e-05, 'epoch': 31.03}
{'eval_loss': 0.16290756187819339, 'eval_r2_score': 0.5702282530420686, 'eval_mse': 0.16290756187819344, 'eval_runtime': 7.5237, 'eval_samples_per_second': 127.597, 'eval_steps_per_second': 1.994, 'epoch': 31.03}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2028, 'learning_rate': 3.908045977011495e-05, 'epoch': 32.76}
{'eval_loss': 0.15889152155229036, 'eval_r2_score': 0.5808230998792422, 'eval_mse': 0.15889152155229042, 'eval_runtime': 7.3517, 'eval_samples_per_second': 130.583, 'eval_steps_per_second': 2.04, 'epoch': 32.76}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2316, 'learning_rate': 3.850574712643678e-05, 'epoch': 34.48}
{'eval_loss': 0.15508663489958308, 'eval_r2_score': 0.590860895331203, 'eval_mse': 0.15508663489958308, 'eval_runtime': 7.3336, 'eval_samples_per_second': 130.905, 'eval_steps_per_second': 2.045, 'epoch': 34.48}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2023, 'learning_rate': 3.793103448275862e-05, 'epoch': 36.21}
{'eval_loss': 0.15575982449908404, 'eval_r2_score': 0.5890849319144293, 'eval_mse': 0.15575982449908407, 'eval_runtime': 7.363, 'eval_samples_per_second': 130.381, 'eval_steps_per_second': 2.037, 'epoch': 36.21}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2001, 'learning_rate': 3.735632183908046e-05, 'epoch': 37.93}
{'eval_loss': 0.1537482771921984, 'eval_r2_score': 0.5943916604063528, 'eval_mse': 0.15374827719219838, 'eval_runtime': 7.3267, 'eval_samples_per_second': 131.028, 'eval_steps_per_second': 2.047, 'epoch': 37.93}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2085, 'learning_rate': 3.67816091954023e-05, 'epoch': 39.66}
{'eval_loss': 0.14860429184520996, 'eval_r2_score': 0.6079621757551381, 'eval_mse': 0.14860429184520998, 'eval_runtime': 7.3581, 'eval_samples_per_second': 130.469, 'eval_steps_per_second': 2.039, 'epoch': 39.66}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2004, 'learning_rate': 3.620689655172414e-05, 'epoch': 41.38}
{'eval_loss': 0.1470362968112192, 'eval_r2_score': 0.6120987545438099, 'eval_mse': 0.14703629681121916, 'eval_runtime': 7.3477, 'eval_samples_per_second': 130.653, 'eval_steps_per_second': 2.041, 'epoch': 41.38}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.203, 'learning_rate': 3.563218390804598e-05, 'epoch': 43.1}
{'eval_loss': 0.14719034540912415, 'eval_r2_score': 0.6116923539183586, 'eval_mse': 0.14719034540912415, 'eval_runtime': 7.7024, 'eval_samples_per_second': 124.637, 'eval_steps_per_second': 1.947, 'epoch': 43.1}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1853, 'learning_rate': 3.505747126436782e-05, 'epoch': 44.83}
{'eval_loss': 0.14346251239972357, 'eval_r2_score': 0.6215268716432967, 'eval_mse': 0.14346251239972357, 'eval_runtime': 7.7074, 'eval_samples_per_second': 124.556, 'eval_steps_per_second': 1.946, 'epoch': 44.83}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.2035, 'learning_rate': 3.4482758620689657e-05, 'epoch': 46.55}
{'eval_loss': 0.1405539852584838, 'eval_r2_score': 0.6291999518622482, 'eval_mse': 0.14055398525848378, 'eval_runtime': 7.7472, 'eval_samples_per_second': 123.915, 'eval_steps_per_second': 1.936, 'epoch': 46.55}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1824, 'learning_rate': 3.390804597701149e-05, 'epoch': 48.28}
{'eval_loss': 0.1405279550586237, 'eval_r2_score': 0.6292686229806331, 'eval_mse': 0.14052795505862367, 'eval_runtime': 7.507, 'eval_samples_per_second': 127.881, 'eval_steps_per_second': 1.998, 'epoch': 48.28}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1892, 'learning_rate': 3.3333333333333335e-05, 'epoch': 50.0}
{'eval_loss': 0.13988662189473305, 'eval_r2_score': 0.6309605448966545, 'eval_mse': 0.13988662189473305, 'eval_runtime': 7.5213, 'eval_samples_per_second': 127.638, 'eval_steps_per_second': 1.994, 'epoch': 50.0}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1751, 'learning_rate': 3.275862068965517e-05, 'epoch': 51.72}
{'eval_loss': 0.13682656793251383, 'eval_r2_score': 0.6390333729591839, 'eval_mse': 0.1368265679325138, 'eval_runtime': 7.5062, 'eval_samples_per_second': 127.894, 'eval_steps_per_second': 1.998, 'epoch': 51.72}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1817, 'learning_rate': 3.218390804597701e-05, 'epoch': 53.45}
{'eval_loss': 0.1353438677334549, 'eval_r2_score': 0.6429449326646866, 'eval_mse': 0.13534386773345486, 'eval_runtime': 7.4775, 'eval_samples_per_second': 128.385, 'eval_steps_per_second': 2.006, 'epoch': 53.45}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1899, 'learning_rate': 3.160919540229885e-05, 'epoch': 55.17}
{'eval_loss': 0.13504430091199518, 'eval_r2_score': 0.6437352296570731, 'eval_mse': 0.1350443009119952, 'eval_runtime': 7.8237, 'eval_samples_per_second': 122.703, 'eval_steps_per_second': 1.917, 'epoch': 55.17}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1699, 'learning_rate': 3.103448275862069e-05, 'epoch': 56.9}
{'eval_loss': 0.13350966111261325, 'eval_r2_score': 0.6477838129145199, 'eval_mse': 0.13350966111261328, 'eval_runtime': 7.5241, 'eval_samples_per_second': 127.589, 'eval_steps_per_second': 1.994, 'epoch': 56.9}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1797, 'learning_rate': 3.045977011494253e-05, 'epoch': 58.62}
{'eval_loss': 0.13114497252480295, 'eval_r2_score': 0.6540221749259445, 'eval_mse': 0.13114497252480295, 'eval_runtime': 7.4921, 'eval_samples_per_second': 128.134, 'eval_steps_per_second': 2.002, 'epoch': 58.62}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1693, 'learning_rate': 2.988505747126437e-05, 'epoch': 60.34}
{'eval_loss': 0.1309775447982499, 'eval_r2_score': 0.6544638714666102, 'eval_mse': 0.13097754479824994, 'eval_runtime': 7.5044, 'eval_samples_per_second': 127.925, 'eval_steps_per_second': 1.999, 'epoch': 60.34}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1734, 'learning_rate': 2.9310344827586206e-05, 'epoch': 62.07}
{'eval_loss': 0.13115945425993927, 'eval_r2_score': 0.6539839701886274, 'eval_mse': 0.13115945425993927, 'eval_runtime': 7.5253, 'eval_samples_per_second': 127.569, 'eval_steps_per_second': 1.993, 'epoch': 62.07}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1675, 'learning_rate': 2.8735632183908045e-05, 'epoch': 63.79}
{'eval_loss': 0.12920872009143275, 'eval_r2_score': 0.6591302655587368, 'eval_mse': 0.1292087200914328, 'eval_runtime': 7.3596, 'eval_samples_per_second': 130.441, 'eval_steps_per_second': 2.038, 'epoch': 63.79}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1796, 'learning_rate': 2.8160919540229884e-05, 'epoch': 65.52}
{'eval_loss': 0.1281099471961827, 'eval_r2_score': 0.6620289741346759, 'eval_mse': 0.12810994719618274, 'eval_runtime': 7.3522, 'eval_samples_per_second': 130.572, 'eval_steps_per_second': 2.04, 'epoch': 65.52}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1607, 'learning_rate': 2.7586206896551727e-05, 'epoch': 67.24}
{'eval_loss': 0.1276013189187478, 'eval_r2_score': 0.663370802965856, 'eval_mse': 0.1276013189187478, 'eval_runtime': 7.3656, 'eval_samples_per_second': 130.336, 'eval_steps_per_second': 2.036, 'epoch': 67.24}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1596, 'learning_rate': 2.7011494252873566e-05, 'epoch': 68.97}
{'eval_loss': 0.12822907482204843, 'eval_r2_score': 0.6617146996633807, 'eval_mse': 0.12822907482204846, 'eval_runtime': 7.3471, 'eval_samples_per_second': 130.663, 'eval_steps_per_second': 2.042, 'epoch': 68.97}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1663, 'learning_rate': 2.6436781609195405e-05, 'epoch': 70.69}
{'eval_loss': 0.1260619466170516, 'eval_r2_score': 0.6674318711918557, 'eval_mse': 0.12606194661705164, 'eval_runtime': 7.3416, 'eval_samples_per_second': 130.762, 'eval_steps_per_second': 2.043, 'epoch': 70.69}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1615, 'learning_rate': 2.5862068965517244e-05, 'epoch': 72.41}
{'eval_loss': 0.12588910570113232, 'eval_r2_score': 0.6678878484437649, 'eval_mse': 0.12588910570113235, 'eval_runtime': 7.3515, 'eval_samples_per_second': 130.586, 'eval_steps_per_second': 2.04, 'epoch': 72.41}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.173, 'learning_rate': 2.5287356321839083e-05, 'epoch': 74.14}
{'eval_loss': 0.12512743399848247, 'eval_r2_score': 0.6698972393798408, 'eval_mse': 0.12512743399848247, 'eval_runtime': 7.3305, 'eval_samples_per_second': 130.96, 'eval_steps_per_second': 2.046, 'epoch': 74.14}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1546, 'learning_rate': 2.4712643678160922e-05, 'epoch': 75.86}
{'eval_loss': 0.12432201367035756, 'eval_r2_score': 0.6720220449902304, 'eval_mse': 0.1243220136703575, 'eval_runtime': 7.3935, 'eval_samples_per_second': 129.844, 'eval_steps_per_second': 2.029, 'epoch': 75.86}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.164, 'learning_rate': 2.413793103448276e-05, 'epoch': 77.59}
{'eval_loss': 0.12331389424978084, 'eval_r2_score': 0.6746815976808993, 'eval_mse': 0.12331389424978086, 'eval_runtime': 7.3475, 'eval_samples_per_second': 130.657, 'eval_steps_per_second': 2.042, 'epoch': 77.59}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.154, 'learning_rate': 2.3563218390804597e-05, 'epoch': 79.31}
{'eval_loss': 0.12269295900310775, 'eval_r2_score': 0.67631970718689, 'eval_mse': 0.12269295900310777, 'eval_runtime': 7.3427, 'eval_samples_per_second': 130.741, 'eval_steps_per_second': 2.043, 'epoch': 79.31}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1587, 'learning_rate': 2.2988505747126437e-05, 'epoch': 81.03}
{'eval_loss': 0.12318202862677095, 'eval_r2_score': 0.6750294766774987, 'eval_mse': 0.12318202862677102, 'eval_runtime': 7.3449, 'eval_samples_per_second': 130.704, 'eval_steps_per_second': 2.042, 'epoch': 81.03}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1512, 'learning_rate': 2.2413793103448276e-05, 'epoch': 82.76}
{'eval_loss': 0.12242354585944294, 'eval_r2_score': 0.6770304547793979, 'eval_mse': 0.12242354585944293, 'eval_runtime': 7.4272, 'eval_samples_per_second': 129.255, 'eval_steps_per_second': 2.02, 'epoch': 82.76}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.17, 'learning_rate': 2.183908045977012e-05, 'epoch': 84.48}
{'eval_loss': 0.12197153544685803, 'eval_r2_score': 0.6782229181765547, 'eval_mse': 0.12197153544685806, 'eval_runtime': 7.5045, 'eval_samples_per_second': 127.924, 'eval_steps_per_second': 1.999, 'epoch': 84.48}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1503, 'learning_rate': 2.1264367816091954e-05, 'epoch': 86.21}
{'eval_loss': 0.1207060564327016, 'eval_r2_score': 0.681561419596515, 'eval_mse': 0.1207060564327016, 'eval_runtime': 7.3507, 'eval_samples_per_second': 130.6, 'eval_steps_per_second': 2.041, 'epoch': 86.21}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1485, 'learning_rate': 2.0689655172413793e-05, 'epoch': 87.93}
{'eval_loss': 0.12146263016206807, 'eval_r2_score': 0.6795654777898638, 'eval_mse': 0.12146263016206806, 'eval_runtime': 7.3565, 'eval_samples_per_second': 130.497, 'eval_steps_per_second': 2.039, 'epoch': 87.93}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1574, 'learning_rate': 2.0114942528735632e-05, 'epoch': 89.66}
{'eval_loss': 0.12029009603890933, 'eval_r2_score': 0.6826587782644882, 'eval_mse': 0.12029009603890933, 'eval_runtime': 7.3748, 'eval_samples_per_second': 130.173, 'eval_steps_per_second': 2.034, 'epoch': 89.66}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1516, 'learning_rate': 1.9540229885057475e-05, 'epoch': 91.38}
{'eval_loss': 0.11991517220537551, 'eval_r2_score': 0.6836478770457619, 'eval_mse': 0.11991517220537551, 'eval_runtime': 7.3643, 'eval_samples_per_second': 130.359, 'eval_steps_per_second': 2.037, 'epoch': 91.38}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1582, 'learning_rate': 1.896551724137931e-05, 'epoch': 93.1}
{'eval_loss': 0.11963400012930303, 'eval_r2_score': 0.6843896462609922, 'eval_mse': 0.119634000129303, 'eval_runtime': 7.3417, 'eval_samples_per_second': 130.759, 'eval_steps_per_second': 2.043, 'epoch': 93.1}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1447, 'learning_rate': 1.839080459770115e-05, 'epoch': 94.83}
{'eval_loss': 0.11893742934363125, 'eval_r2_score': 0.6862272923468247, 'eval_mse': 0.1189374293436313, 'eval_runtime': 7.3402, 'eval_samples_per_second': 130.786, 'eval_steps_per_second': 2.044, 'epoch': 94.83}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1618, 'learning_rate': 1.781609195402299e-05, 'epoch': 96.55}
{'eval_loss': 0.11866568801247727, 'eval_r2_score': 0.6869441820066063, 'eval_mse': 0.11866568801247733, 'eval_runtime': 7.348, 'eval_samples_per_second': 130.647, 'eval_steps_per_second': 2.041, 'epoch': 96.55}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1452, 'learning_rate': 1.7241379310344828e-05, 'epoch': 98.28}
{'eval_loss': 0.11768180879965197, 'eval_r2_score': 0.6895397858153952, 'eval_mse': 0.11768180879965197, 'eval_runtime': 7.3534, 'eval_samples_per_second': 130.552, 'eval_steps_per_second': 2.04, 'epoch': 98.28}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.151, 'learning_rate': 1.6666666666666667e-05, 'epoch': 100.0}
{'eval_loss': 0.11870668564203572, 'eval_r2_score': 0.6868360248242551, 'eval_mse': 0.11870668564203578, 'eval_runtime': 7.3529, 'eval_samples_per_second': 130.56, 'eval_steps_per_second': 2.04, 'epoch': 100.0}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1449, 'learning_rate': 1.6091954022988507e-05, 'epoch': 101.72}
{'eval_loss': 0.1179957483442531, 'eval_r2_score': 0.6887115716737873, 'eval_mse': 0.1179957483442531, 'eval_runtime': 7.3388, 'eval_samples_per_second': 130.811, 'eval_steps_per_second': 2.044, 'epoch': 101.72}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1485, 'learning_rate': 1.5517241379310346e-05, 'epoch': 103.45}
{'eval_loss': 0.11780658017098654, 'eval_r2_score': 0.6892106223952901, 'eval_mse': 0.11780658017098658, 'eval_runtime': 7.3487, 'eval_samples_per_second': 130.635, 'eval_steps_per_second': 2.041, 'epoch': 103.45}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1592, 'learning_rate': 1.4942528735632185e-05, 'epoch': 105.17}
{'eval_loss': 0.11671588597257494, 'eval_r2_score': 0.6920880182978741, 'eval_mse': 0.11671588597257491, 'eval_runtime': 7.3553, 'eval_samples_per_second': 130.518, 'eval_steps_per_second': 2.039, 'epoch': 105.17}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1425, 'learning_rate': 1.4367816091954022e-05, 'epoch': 106.9}
{'eval_loss': 0.11701218337876955, 'eval_r2_score': 0.6913063464564245, 'eval_mse': 0.11701218337876958, 'eval_runtime': 7.3457, 'eval_samples_per_second': 130.688, 'eval_steps_per_second': 2.042, 'epoch': 106.9}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1512, 'learning_rate': 1.3793103448275863e-05, 'epoch': 108.62}
{'eval_loss': 0.11645893412845919, 'eval_r2_score': 0.6927658913300188, 'eval_mse': 0.11645893412845919, 'eval_runtime': 7.359, 'eval_samples_per_second': 130.452, 'eval_steps_per_second': 2.038, 'epoch': 108.62}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1445, 'learning_rate': 1.3218390804597702e-05, 'epoch': 110.34}
{'eval_loss': 0.11621632346948656, 'eval_r2_score': 0.6934059304143632, 'eval_mse': 0.11621632346948654, 'eval_runtime': 7.3387, 'eval_samples_per_second': 130.812, 'eval_steps_per_second': 2.044, 'epoch': 110.34}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1479, 'learning_rate': 1.2643678160919542e-05, 'epoch': 112.07}
{'eval_loss': 0.11632029019384481, 'eval_r2_score': 0.6931316524113176, 'eval_mse': 0.11632029019384488, 'eval_runtime': 7.3365, 'eval_samples_per_second': 130.852, 'eval_steps_per_second': 2.045, 'epoch': 112.07}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1456, 'learning_rate': 1.206896551724138e-05, 'epoch': 113.79}
{'eval_loss': 0.11584303164038275, 'eval_r2_score': 0.6943907237515748, 'eval_mse': 0.11584303164038277, 'eval_runtime': 7.337, 'eval_samples_per_second': 130.844, 'eval_steps_per_second': 2.044, 'epoch': 113.79}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1567, 'learning_rate': 1.1494252873563218e-05, 'epoch': 115.52}
{'eval_loss': 0.11578214972540728, 'eval_r2_score': 0.6945513383151686, 'eval_mse': 0.11578214972540725, 'eval_runtime': 7.3496, 'eval_samples_per_second': 130.619, 'eval_steps_per_second': 2.041, 'epoch': 115.52}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1416, 'learning_rate': 1.091954022988506e-05, 'epoch': 117.24}
{'eval_loss': 0.11497233471700771, 'eval_r2_score': 0.6966877376747811, 'eval_mse': 0.11497233471700774, 'eval_runtime': 7.383, 'eval_samples_per_second': 130.028, 'eval_steps_per_second': 2.032, 'epoch': 117.24}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1397, 'learning_rate': 1.0344827586206897e-05, 'epoch': 118.97}
{'eval_loss': 0.11586313185851482, 'eval_r2_score': 0.6943376967112014, 'eval_mse': 0.11586313185851486, 'eval_runtime': 7.3927, 'eval_samples_per_second': 129.858, 'eval_steps_per_second': 2.029, 'epoch': 118.97}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1481, 'learning_rate': 9.770114942528738e-06, 'epoch': 120.69}
{'eval_loss': 0.1151972660693431, 'eval_r2_score': 0.696094338945316, 'eval_mse': 0.1151972660693431, 'eval_runtime': 7.3513, 'eval_samples_per_second': 130.589, 'eval_steps_per_second': 2.04, 'epoch': 120.69}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1438, 'learning_rate': 9.195402298850575e-06, 'epoch': 122.41}
{'eval_loss': 0.11518134747465088, 'eval_r2_score': 0.6961363343086442, 'eval_mse': 0.11518134747465088, 'eval_runtime': 7.3459, 'eval_samples_per_second': 130.685, 'eval_steps_per_second': 2.042, 'epoch': 122.41}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1559, 'learning_rate': 8.620689655172414e-06, 'epoch': 124.14}
{'eval_loss': 0.11464817934767814, 'eval_r2_score': 0.6975429025164643, 'eval_mse': 0.11464817934767817, 'eval_runtime': 7.3425, 'eval_samples_per_second': 130.745, 'eval_steps_per_second': 2.043, 'epoch': 124.14}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1395, 'learning_rate': 8.045977011494253e-06, 'epoch': 125.86}
{'eval_loss': 0.11466171242223619, 'eval_r2_score': 0.6975072004715288, 'eval_mse': 0.11466171242223618, 'eval_runtime': 7.4032, 'eval_samples_per_second': 129.673, 'eval_steps_per_second': 2.026, 'epoch': 125.86}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1483, 'learning_rate': 7.4712643678160925e-06, 'epoch': 127.59}
{'eval_loss': 0.11456456192220726, 'eval_r2_score': 0.6977634963710799, 'eval_mse': 0.11456456192220726, 'eval_runtime': 7.3437, 'eval_samples_per_second': 130.725, 'eval_steps_per_second': 2.043, 'epoch': 127.59}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1405, 'learning_rate': 6.896551724137932e-06, 'epoch': 129.31}
{'eval_loss': 0.11415878438324115, 'eval_r2_score': 0.6988339913179513, 'eval_mse': 0.11415878438324117, 'eval_runtime': 7.3573, 'eval_samples_per_second': 130.483, 'eval_steps_per_second': 2.039, 'epoch': 129.31}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1444, 'learning_rate': 6.321839080459771e-06, 'epoch': 131.03}
{'eval_loss': 0.11446240951091641, 'eval_r2_score': 0.698032987975707, 'eval_mse': 0.11446240951091644, 'eval_runtime': 7.3463, 'eval_samples_per_second': 130.678, 'eval_steps_per_second': 2.042, 'epoch': 131.03}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1398, 'learning_rate': 5.747126436781609e-06, 'epoch': 132.76}
{'eval_loss': 0.11433442156840691, 'eval_r2_score': 0.6983706371370333, 'eval_mse': 0.1143344215684069, 'eval_runtime': 7.3505, 'eval_samples_per_second': 130.603, 'eval_steps_per_second': 2.041, 'epoch': 132.76}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1566, 'learning_rate': 5.172413793103448e-06, 'epoch': 134.48}
{'eval_loss': 0.11426364705457294, 'eval_r2_score': 0.6985573496880029, 'eval_mse': 0.11426364705457295, 'eval_runtime': 7.3507, 'eval_samples_per_second': 130.6, 'eval_steps_per_second': 2.041, 'epoch': 134.48}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1396, 'learning_rate': 4.5977011494252875e-06, 'epoch': 136.21}
{'eval_loss': 0.11389771870585304, 'eval_r2_score': 0.6995227171876919, 'eval_mse': 0.11389771870585304, 'eval_runtime': 7.3626, 'eval_samples_per_second': 130.389, 'eval_steps_per_second': 2.037, 'epoch': 136.21}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.138, 'learning_rate': 4.022988505747127e-06, 'epoch': 137.93}
{'eval_loss': 0.11418761616978473, 'eval_r2_score': 0.6987579292424536, 'eval_mse': 0.11418761616978472, 'eval_runtime': 7.3446, 'eval_samples_per_second': 130.708, 'eval_steps_per_second': 2.042, 'epoch': 137.93}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1469, 'learning_rate': 3.448275862068966e-06, 'epoch': 139.66}
{'eval_loss': 0.11397167626073763, 'eval_r2_score': 0.6993276073524144, 'eval_mse': 0.11397167626073765, 'eval_runtime': 7.3614, 'eval_samples_per_second': 130.411, 'eval_steps_per_second': 2.038, 'epoch': 139.66}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.142, 'learning_rate': 2.8735632183908046e-06, 'epoch': 141.38}
{'eval_loss': 0.11390397767967834, 'eval_r2_score': 0.699506205184909, 'eval_mse': 0.11390397767967833, 'eval_runtime': 7.3473, 'eval_samples_per_second': 130.66, 'eval_steps_per_second': 2.042, 'epoch': 141.38}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1489, 'learning_rate': 2.2988505747126437e-06, 'epoch': 143.1}
{'eval_loss': 0.11387631237029575, 'eval_r2_score': 0.6995791899390016, 'eval_mse': 0.11387631237029575, 'eval_runtime': 7.3503, 'eval_samples_per_second': 130.607, 'eval_steps_per_second': 2.041, 'epoch': 143.1}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1365, 'learning_rate': 1.724137931034483e-06, 'epoch': 144.83}
{'eval_loss': 0.11380261728844747, 'eval_r2_score': 0.6997736073356097, 'eval_mse': 0.1138026172884475, 'eval_runtime': 7.3648, 'eval_samples_per_second': 130.35, 'eval_steps_per_second': 2.037, 'epoch': 144.83}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1534, 'learning_rate': 1.1494252873563219e-06, 'epoch': 146.55}
{'eval_loss': 0.11380474700188042, 'eval_r2_score': 0.6997679888692104, 'eval_mse': 0.11380474700188044, 'eval_runtime': 7.3556, 'eval_samples_per_second': 130.512, 'eval_steps_per_second': 2.039, 'epoch': 146.55}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.138, 'learning_rate': 5.747126436781609e-07, 'epoch': 148.28}
{'eval_loss': 0.11375724997932123, 'eval_r2_score': 0.6998932923120047, 'eval_mse': 0.11375724997932125, 'eval_runtime': 7.3637, 'eval_samples_per_second': 130.369, 'eval_steps_per_second': 2.037, 'epoch': 148.28}


  "inputs_embeds": torch.tensor(self.tokenized_inputs[idx]),
  "labels_ids": torch.tensor(self.labels[idx]),


{'loss': 0.1435, 'learning_rate': 0.0, 'epoch': 150.0}
{'eval_loss': 0.11378440187284028, 'eval_r2_score': 0.6998216620170304, 'eval_mse': 0.11378440187284025, 'eval_runtime': 7.3492, 'eval_samples_per_second': 130.626, 'eval_steps_per_second': 2.041, 'epoch': 150.0}
{'train_runtime': 5075.1837, 'train_samples_per_second': 54.855, 'train_steps_per_second': 0.857, 'train_loss': 0.18528716438118067, 'epoch': 150.0}


TrainOutput(global_step=4350, training_loss=0.18528716438118067, metrics={'train_runtime': 5075.1837, 'train_samples_per_second': 54.855, 'train_steps_per_second': 0.857, 'train_loss': 0.18528716438118067, 'epoch': 150.0})

In [None]:
model = reservoirtransformers.ReservoirTTimeSeries.from_pretrained("results_task1/checkpoint-450", config=configuration).to("cuda", dtype=float)


In [10]:
cnt = 0
ln = len(X_test)
y_pred = []
y_test1 = []
while cnt < ln:
    #print(cnt, ln)
    input_ids = torch.stack(X_test[cnt:cnt+batch], dim=0)
    #y_test1 = y_test1 + [k.detach().numpy().flatten() for k in y_test[cnt:cnt+64]]
    
    output = model(inputs_embeds = input_ids.to(model.device))['logits']
    y_pred = y_pred + list(output.cpu().detach().numpy().reshape(output.size(0), -1))
    #y_test = y_test + labels_ids
    
    cnt=cnt+batch


In [11]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
y_test1 = [i.detach().numpy().flatten() for i in y_test]

mse = mean_squared_error(y_test1, y_pred)
mse

0.14957888019608853