# TREC-6: Grid Search

## Librairies

In [1]:
# !pip install transformers==4.8.2
# !pip install datasets==1.7.0
# !pip install ax-platform==0.1.20
# !pip install ipywidgets
# !jupyter nbextension enable --py widgetsnbextension

In [2]:
# comment this if library is pip installed
import os
import sys
sys.path.insert(0, os.path.abspath("../../.."))

In [3]:
import io
import re
import pickle

from tqdm.notebook import tqdm

from timeit import default_timer as timer

import random
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

from itertools import product

from datasets import load_dataset, Dataset, concatenate_datasets
from transformers import AutoTokenizer
from transformers import BertModel
from transformers.data.data_collator import DataCollatorWithPadding

import esntorch.core.reservoir as res
import esntorch.core.learning_algo as la
import esntorch.core.pooling_strategy as ps
import esntorch.core.esn as esn

In [4]:
%config Completer.use_jedi = False
%load_ext autoreload
%autoreload 2

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [6]:
SEED = 42

## Global variables

In [5]:
RESULTS_PATH = '/results/ESN_v2'    # path of your result folder
CACHE_DIR = '/huggingface_datasets' # path of your folder
RESULTS_FILE = 'trec-6_params.pkl'

## Dataset

In [8]:
# rename correct column as 'labels': depends on the dataset you load

def tokenize(sample):
    """Tokenize sample"""
    
    sample = tokenizer(sample['text'], truncation=True, padding=False, return_length=True)
    
    return sample
    
def load_and_enrich_dataset(dataset_name, split, cache_dir):
    """
    Load dataset from the datasets library of HuggingFace.
    Tokenize and add length.
    """
    
    # Load dataset
    dataset = load_dataset(dataset_name, split=split, cache_dir=CACHE_DIR)
    
    # Rename label column for tokenization purposes (use 'label-fine' for fine-grained labels)
    dataset = dataset.rename_column('label-coarse', 'labels')
    
    # Tokenize data
    dataset = dataset.map(tokenize, batched=True)
    dataset = dataset.rename_column('length', 'lengths')
    dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels', 'lengths'])
    
    return dataset

In [None]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

full_train_dataset = load_and_enrich_dataset('trec', split='train', cache_dir=CACHE_DIR)

# Select 20% of the dataset
full_train_dataset = full_train_dataset.train_test_split(train_size=0.2, shuffle=True)['train']

# Build mini train and val sets
train_val_datasets = full_train_dataset.train_test_split(train_size=0.8, shuffle=True)
train_dataset = train_val_datasets['train'].sort("lengths")
val_dataset = train_val_datasets['test'].sort("lengths")

test_dataset = load_and_enrich_dataset('trec', split='test', cache_dir=CACHE_DIR).sort("lengths")

dataset_d = {
    'full_train': full_train_dataset,
    'train': train_dataset,
    'val': val_dataset,
    'test': test_dataset
    }

dataloader_d = {}
for k, v in dataset_d.items():
    dataloader_d[k] = torch.utils.data.DataLoader(v, batch_size=256, collate_fn=DataCollatorWithPadding(tokenizer))

In [10]:
dataset_d

{'full_train': Dataset({
     features: ['attention_mask', 'input_ids', 'label-fine', 'labels', 'lengths', 'text', 'token_type_ids'],
     num_rows: 1090
 }),
 'train': Dataset({
     features: ['attention_mask', 'input_ids', 'label-fine', 'labels', 'lengths', 'text', 'token_type_ids'],
     num_rows: 872
 }),
 'val': Dataset({
     features: ['attention_mask', 'input_ids', 'label-fine', 'labels', 'lengths', 'text', 'token_type_ids'],
     num_rows: 218
 }),
 'test': Dataset({
     features: ['attention_mask', 'input_ids', 'label-fine', 'labels', 'lengths', 'text', 'token_type_ids'],
     num_rows: 500
 })}

## Grid Search

In [11]:
params_d = {
            'reservoir_dim' : [1000], # we fix the reservoir dim
            'spectral_radius' : [0.5, 1.0, 1.5],
            'leaking_rate': [0.1, 0.5, 0.9],
            'input_scaling' : [0.1, 1.0], 
            'bias_scaling' : [0.0],
            'sparsity' : [0.0, 0.99],
            'activation_function' : ['relu'],
            'alpha' : [0.1, 1.0, 10.0]
            }

In [12]:
params_l = [ dict(zip(params_d, v)) for v in product(*params_d.values()) ]
params_l

[{'reservoir_dim': 1000,
  'spectral_radius': 0.5,
  'leaking_rate': 0.1,
  'input_scaling': 0.1,
  'bias_scaling': 0.0,
  'sparsity': 0.0,
  'activation_function': 'relu',
  'alpha': 0.1},
 {'reservoir_dim': 1000,
  'spectral_radius': 0.5,
  'leaking_rate': 0.1,
  'input_scaling': 0.1,
  'bias_scaling': 0.0,
  'sparsity': 0.0,
  'activation_function': 'relu',
  'alpha': 1.0},
 {'reservoir_dim': 1000,
  'spectral_radius': 0.5,
  'leaking_rate': 0.1,
  'input_scaling': 0.1,
  'bias_scaling': 0.0,
  'sparsity': 0.0,
  'activation_function': 'relu',
  'alpha': 10.0},
 {'reservoir_dim': 1000,
  'spectral_radius': 0.5,
  'leaking_rate': 0.1,
  'input_scaling': 0.1,
  'bias_scaling': 0.0,
  'sparsity': 0.99,
  'activation_function': 'relu',
  'alpha': 0.1},
 {'reservoir_dim': 1000,
  'spectral_radius': 0.5,
  'leaking_rate': 0.1,
  'input_scaling': 0.1,
  'bias_scaling': 0.0,
  'sparsity': 0.99,
  'activation_function': 'relu',
  'alpha': 1.0},
 {'reservoir_dim': 1000,
  'spectral_radius': 0

In [13]:
len(params_l)

108

In [14]:
# We store the results as follows:
# esn_results_l = 
# [
# {params_1 : ... , acc_1 : [...] , times_1 : [...]},
# {params_2 : ... , acc_2 : [...] , times_2 : [...]},
# {params_3 : ... , acc_3 : [...] , times_3 : [...]},
# ...
# ]

In [15]:
esn_results_l = []
# cbs_results_l = []

# loop over params
for params in tqdm(params_l):

    esn_d = {}
    esn_d['params'] = params
    esn_d['accuracy'] = []
    esn_d['time'] = []

#     cbs_d = {}
#     cbs_d['params'] = params
#     cbs_d['accuracy'] = []
#     cbs_d['time'] = []

    # loop over seeds
    for seed in tqdm([42, 127, 74684, 888, 7716843]):

        torch.manual_seed(seed)
        random.seed(seed)
        np.random.seed(seed)

#         # loop over modes...
#         for mode in tqdm(['esn', 'linear_layer']):

        # ESN parameters
        esn_params = {
                    'embedding': 'bert-base-uncased', # TEXT.vocab.vectors,
                    'distribution' : 'gaussian',              # uniform, gaussian
                    'input_dim' : 768,                        # dim of BERT encoding!
                    'reservoir_dim' : None,
                    'bias_scaling' : None,
                    'sparsity' : None,
                    'spectral_radius' : None,
                    'leaking_rate': None,
                    'activation_function' : None,
                    'input_scaling' : None,
                    'mean' : 0.0,
                    'std' : 1.0,
                    'pooling_strategy' : 'mean',
                    'bidirectional' : False,
                    'device' : device,
                    'mode' : 'esn', #None,
                    'seed' : seed
                     }

        # Fill up the reservoir
        for k, v in params.items():
            if k in esn_params.keys():
                esn_params[k] = v
#         esn_params['mode'] = mode

        # Instantiate the ESN
        ESN = esn.EchoStateNetwork(**esn_params)
        ESN.learning_algo = la.RidgeRegression(alpha=params['alpha'])
        ESN = ESN.to(device)

        # Warm up the ESN
        nb_sentences = 10

        for i in range(nb_sentences): 
            sentence = dataset_d["train"].select([i])
            dataloader_tmp = torch.utils.data.DataLoader(sentence, 
                                                         batch_size=1, 
                                                         collate_fn=DataCollatorWithPadding(tokenizer))  

            for sentence in dataloader_tmp:
                ESN.warm_up(sentence)

        # train on mini train set
        t0 = timer()
        ESN.fit(dataloader_d["train"])
        t1 = timer()
        time = t1 - t0
        print(f'training time: {time}')

        # acc on mini validation set
        val_pred, val_acc = ESN.predict(dataloader_d["val"], verbose=False)
        print(f'accuracy: {val_acc.item()}')

        # save results
        if esn_params['mode'] == 'esn':
            esn_d['time'].append(time)
            esn_d['accuracy'].append(val_acc.item())
            print(esn_d)

#         elif mode == 'linear_layer':
#             cbs_d['time'].append(time)
#             cbs_d['accuracy'].append(val_acc.item())

        # clean objects
        del ESN
        torch.cuda.empty_cache()

    esn_results_l.append(esn_d)
#     cbs_results_l.append(cbs_d)

  0%|          | 0/108 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8782334187999368
accuracy: 69.72476959228516
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [69.72476959228516], 'time': [0.8782334187999368]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7658956442028284
accuracy: 73.85321044921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [69.72476959228516, 73.85321044921875], 'time': [0.8782334187999368, 0.7658956442028284]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8490239018574357
accuracy: 73.85321044921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [69.72476959228516, 73.85321044921875, 73.85321044921875], 'time': [0.8782334187999368, 0.7658956442028284, 0.8490239018574357]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8380396263673902
accuracy: 75.22935485839844
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [69.72476959228516, 73.85321044921875, 73.85321044921875, 75.22935485839844], 'time': [0.8782334187999368, 0.7658956442028284, 0.8490239018574357, 0.8380396263673902]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7433061115443707
accuracy: 74.77063751220703
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [69.72476959228516, 73.85321044921875, 73.85321044921875, 75.22935485839844, 74.77063751220703], 'time': [0.8782334187999368, 0.7658956442028284, 0.8490239018574357, 0.8380396263673902, 0.7433061115443707]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7118852585554123
accuracy: 77.06421661376953
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [77.06421661376953], 'time': [0.7118852585554123]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7155737029388547
accuracy: 80.73394012451172
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [77.06421661376953, 80.73394012451172], 'time': [0.7118852585554123, 0.7155737029388547]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7219203375279903
accuracy: 79.81651306152344
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [77.06421661376953, 80.73394012451172, 79.81651306152344], 'time': [0.7118852585554123, 0.7155737029388547, 0.7219203375279903]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7177853733301163
accuracy: 78.89907836914062
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [77.06421661376953, 80.73394012451172, 79.81651306152344, 78.89907836914062], 'time': [0.7118852585554123, 0.7155737029388547, 0.7219203375279903, 0.7177853733301163]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7740069339051843
accuracy: 83.02751922607422
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [77.06421661376953, 80.73394012451172, 79.81651306152344, 78.89907836914062, 83.02751922607422], 'time': [0.7118852585554123, 0.7155737029388547, 0.7219203375279903, 0.7177853733301163, 0.7740069339051843]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.821940079331398
accuracy: 79.35779571533203
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [79.35779571533203], 'time': [0.821940079331398]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7737212553620338
accuracy: 80.27522277832031
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [79.35779571533203, 80.27522277832031], 'time': [0.821940079331398, 0.7737212553620338]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7324529699981213
accuracy: 79.81651306152344
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [79.35779571533203, 80.27522277832031, 79.81651306152344], 'time': [0.821940079331398, 0.7737212553620338, 0.7324529699981213]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7323019998148084
accuracy: 81.65137481689453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [79.35779571533203, 80.27522277832031, 79.81651306152344, 81.65137481689453], 'time': [0.821940079331398, 0.7737212553620338, 0.7324529699981213, 0.7323019998148084]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7000826206058264
accuracy: 81.19265747070312
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [79.35779571533203, 80.27522277832031, 79.81651306152344, 81.65137481689453, 81.19265747070312], 'time': [0.821940079331398, 0.7737212553620338, 0.7324529699981213, 0.7323019998148084, 0.7000826206058264]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7401987509801984
accuracy: 72.93577575683594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [72.93577575683594], 'time': [0.7401987509801984]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7301510404795408
accuracy: 72.47705841064453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [72.93577575683594, 72.47705841064453], 'time': [0.7401987509801984, 0.7301510404795408]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7480081086978316
accuracy: 72.47705841064453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [72.93577575683594, 72.47705841064453, 72.47705841064453], 'time': [0.7401987509801984, 0.7301510404795408, 0.7480081086978316]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7339502852410078
accuracy: 72.93577575683594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [72.93577575683594, 72.47705841064453, 72.47705841064453, 72.93577575683594], 'time': [0.7401987509801984, 0.7301510404795408, 0.7480081086978316, 0.7339502852410078]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.748829715885222
accuracy: 76.14678192138672
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [72.93577575683594, 72.47705841064453, 72.47705841064453, 72.93577575683594, 76.14678192138672], 'time': [0.7401987509801984, 0.7301510404795408, 0.7480081086978316, 0.7339502852410078, 0.748829715885222]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7353564882650971
accuracy: 81.65137481689453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [81.65137481689453], 'time': [0.7353564882650971]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7302601588889956
accuracy: 82.56880187988281
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [81.65137481689453, 82.56880187988281], 'time': [0.7353564882650971, 0.7302601588889956]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7490786937996745
accuracy: 78.44036102294922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [81.65137481689453, 82.56880187988281, 78.44036102294922], 'time': [0.7353564882650971, 0.7302601588889956, 0.7490786937996745]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8140805326402187
accuracy: 81.19265747070312
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [81.65137481689453, 82.56880187988281, 78.44036102294922, 81.19265747070312], 'time': [0.7353564882650971, 0.7302601588889956, 0.7490786937996745, 0.8140805326402187]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8070934489369392
accuracy: 82.1100845336914
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [81.65137481689453, 82.56880187988281, 78.44036102294922, 81.19265747070312, 82.1100845336914], 'time': [0.7353564882650971, 0.7302601588889956, 0.7490786937996745, 0.8140805326402187, 0.8070934489369392]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7613432332873344
accuracy: 79.81651306152344
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [79.81651306152344], 'time': [0.7613432332873344]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8762675561010838
accuracy: 81.65137481689453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [79.81651306152344, 81.65137481689453], 'time': [0.7613432332873344, 0.8762675561010838]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7305963402613997
accuracy: 79.81651306152344
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [79.81651306152344, 81.65137481689453, 79.81651306152344], 'time': [0.7613432332873344, 0.8762675561010838, 0.7305963402613997]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7392765246331692
accuracy: 80.73394012451172
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [79.81651306152344, 81.65137481689453, 79.81651306152344, 80.73394012451172], 'time': [0.7613432332873344, 0.8762675561010838, 0.7305963402613997, 0.7392765246331692]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7669543335214257
accuracy: 82.1100845336914
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [79.81651306152344, 81.65137481689453, 79.81651306152344, 80.73394012451172, 82.1100845336914], 'time': [0.7613432332873344, 0.8762675561010838, 0.7305963402613997, 0.7392765246331692, 0.7669543335214257]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7432306641712785
accuracy: 22.935779571533203
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [22.935779571533203], 'time': [0.7432306641712785]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7547042090445757
accuracy: 31.65137481689453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [22.935779571533203, 31.65137481689453], 'time': [0.7432306641712785, 0.7547042090445757]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7456621648743749
accuracy: 33.027523040771484
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [22.935779571533203, 31.65137481689453, 33.027523040771484], 'time': [0.7432306641712785, 0.7547042090445757, 0.7456621648743749]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7802872620522976
accuracy: 37.15596008300781
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [22.935779571533203, 31.65137481689453, 33.027523040771484, 37.15596008300781], 'time': [0.7432306641712785, 0.7547042090445757, 0.7456621648743749, 0.7802872620522976]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7401964738965034
accuracy: 27.522933959960938
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [22.935779571533203, 31.65137481689453, 33.027523040771484, 37.15596008300781, 27.522933959960938], 'time': [0.7432306641712785, 0.7547042090445757, 0.7456621648743749, 0.7802872620522976, 0.7401964738965034]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7601708173751831
accuracy: 56.88072967529297
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [56.88072967529297], 'time': [0.7601708173751831]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8129513831809163
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [56.88072967529297, 60.091739654541016], 'time': [0.7601708173751831, 0.8129513831809163]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8616238860413432
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [56.88072967529297, 60.091739654541016, 60.091739654541016], 'time': [0.7601708173751831, 0.8129513831809163, 0.8616238860413432]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8040235647931695
accuracy: 50.45871353149414
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [56.88072967529297, 60.091739654541016, 60.091739654541016, 50.45871353149414], 'time': [0.7601708173751831, 0.8129513831809163, 0.8616238860413432, 0.8040235647931695]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7177161434665322
accuracy: 62.844032287597656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [56.88072967529297, 60.091739654541016, 60.091739654541016, 50.45871353149414, 62.844032287597656], 'time': [0.7601708173751831, 0.8129513831809163, 0.8616238860413432, 0.8040235647931695, 0.7177161434665322]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7982775717973709
accuracy: 72.01834869384766
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [72.01834869384766], 'time': [0.7982775717973709]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7523916764184833
accuracy: 72.93577575683594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [72.01834869384766, 72.93577575683594], 'time': [0.7982775717973709, 0.7523916764184833]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7561097694560885
accuracy: 73.39449310302734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [72.01834869384766, 72.93577575683594, 73.39449310302734], 'time': [0.7982775717973709, 0.7523916764184833, 0.7561097694560885]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7258562594652176
accuracy: 75.22935485839844
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [72.01834869384766, 72.93577575683594, 73.39449310302734, 75.22935485839844], 'time': [0.7982775717973709, 0.7523916764184833, 0.7561097694560885, 0.7258562594652176]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7302361596375704
accuracy: 74.77063751220703
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [72.01834869384766, 72.93577575683594, 73.39449310302734, 75.22935485839844, 74.77063751220703], 'time': [0.7982775717973709, 0.7523916764184833, 0.7561097694560885, 0.7258562594652176, 0.7302361596375704]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7161510139703751
accuracy: 32.56880569458008
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [32.56880569458008], 'time': [0.7161510139703751]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.74417778942734
accuracy: 44.03669357299805
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [32.56880569458008, 44.03669357299805], 'time': [0.7161510139703751, 0.74417778942734]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7536350302398205
accuracy: 38.99082565307617
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [32.56880569458008, 44.03669357299805, 38.99082565307617], 'time': [0.7161510139703751, 0.74417778942734, 0.7536350302398205]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7841410394757986
accuracy: 33.027523040771484
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [32.56880569458008, 44.03669357299805, 38.99082565307617, 33.027523040771484], 'time': [0.7161510139703751, 0.74417778942734, 0.7536350302398205, 0.7841410394757986]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8489905092865229
accuracy: 18.348623275756836
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [32.56880569458008, 44.03669357299805, 38.99082565307617, 33.027523040771484, 18.348623275756836], 'time': [0.7161510139703751, 0.74417778942734, 0.7536350302398205, 0.7841410394757986, 0.8489905092865229]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7954370388761163
accuracy: 57.79816436767578
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [57.79816436767578], 'time': [0.7954370388761163]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7815007166936994
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [57.79816436767578, 60.091739654541016], 'time': [0.7954370388761163, 0.7815007166936994]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7110813651233912
accuracy: 60.55045700073242
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [57.79816436767578, 60.091739654541016, 60.55045700073242], 'time': [0.7954370388761163, 0.7815007166936994, 0.7110813651233912]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7330715972930193
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [57.79816436767578, 60.091739654541016, 60.55045700073242, 61.46788787841797], 'time': [0.7954370388761163, 0.7815007166936994, 0.7110813651233912, 0.7330715972930193]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7423525480553508
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [57.79816436767578, 60.091739654541016, 60.55045700073242, 61.46788787841797, 60.091739654541016], 'time': [0.7954370388761163, 0.7815007166936994, 0.7110813651233912, 0.7330715972930193, 0.7423525480553508]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7908410225063562
accuracy: 72.47705841064453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [72.47705841064453], 'time': [0.7908410225063562]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7458233740180731
accuracy: 73.39449310302734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [72.47705841064453, 73.39449310302734], 'time': [0.7908410225063562, 0.7458233740180731]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7970334533601999
accuracy: 73.39449310302734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [72.47705841064453, 73.39449310302734, 73.39449310302734], 'time': [0.7908410225063562, 0.7458233740180731, 0.7970334533601999]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.757041553966701
accuracy: 73.39449310302734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [72.47705841064453, 73.39449310302734, 73.39449310302734, 73.39449310302734], 'time': [0.7908410225063562, 0.7458233740180731, 0.7970334533601999, 0.757041553966701]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6957942908629775
accuracy: 77.06421661376953
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [72.47705841064453, 73.39449310302734, 73.39449310302734, 73.39449310302734, 77.06421661376953], 'time': [0.7908410225063562, 0.7458233740180731, 0.7970334533601999, 0.757041553966701, 0.6957942908629775]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7711579138413072
accuracy: 58.71559524536133
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [58.71559524536133], 'time': [0.7711579138413072]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7440908867865801
accuracy: 62.844032287597656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [58.71559524536133, 62.844032287597656], 'time': [0.7711579138413072, 0.7440908867865801]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7186960503458977
accuracy: 57.79816436767578
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [58.71559524536133, 62.844032287597656, 57.79816436767578], 'time': [0.7711579138413072, 0.7440908867865801, 0.7186960503458977]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6869473084807396
accuracy: 59.633026123046875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [58.71559524536133, 62.844032287597656, 57.79816436767578, 59.633026123046875], 'time': [0.7711579138413072, 0.7440908867865801, 0.7186960503458977, 0.6869473084807396]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7704929169267416
accuracy: 61.00917053222656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [58.71559524536133, 62.844032287597656, 57.79816436767578, 59.633026123046875, 61.00917053222656], 'time': [0.7711579138413072, 0.7440908867865801, 0.7186960503458977, 0.6869473084807396, 0.7704929169267416]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.778518121689558
accuracy: 69.72476959228516
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [69.72476959228516], 'time': [0.778518121689558]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8003261014819145
accuracy: 70.64219665527344
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [69.72476959228516, 70.64219665527344], 'time': [0.778518121689558, 0.8003261014819145]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7287426413968205
accuracy: 72.01834869384766
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [69.72476959228516, 70.64219665527344, 72.01834869384766], 'time': [0.778518121689558, 0.8003261014819145, 0.7287426413968205]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7752078380435705
accuracy: 73.85321044921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [69.72476959228516, 70.64219665527344, 72.01834869384766, 73.85321044921875], 'time': [0.778518121689558, 0.8003261014819145, 0.7287426413968205, 0.7752078380435705]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7041712114587426
accuracy: 73.39449310302734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [69.72476959228516, 70.64219665527344, 72.01834869384766, 73.85321044921875, 73.39449310302734], 'time': [0.778518121689558, 0.8003261014819145, 0.7287426413968205, 0.7752078380435705, 0.7041712114587426]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8058241177350283
accuracy: 76.60549926757812
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.60549926757812], 'time': [0.8058241177350283]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8542102817445993
accuracy: 76.60549926757812
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.60549926757812, 76.60549926757812], 'time': [0.8058241177350283, 0.8542102817445993]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6955093592405319
accuracy: 77.98165130615234
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.60549926757812, 76.60549926757812, 77.98165130615234], 'time': [0.8058241177350283, 0.8542102817445993, 0.6955093592405319]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7124064732342958
accuracy: 78.44036102294922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.60549926757812, 76.60549926757812, 77.98165130615234, 78.44036102294922], 'time': [0.8058241177350283, 0.8542102817445993, 0.6955093592405319, 0.7124064732342958]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7425152882933617
accuracy: 78.89907836914062
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.60549926757812, 76.60549926757812, 77.98165130615234, 78.44036102294922, 78.89907836914062], 'time': [0.8058241177350283, 0.8542102817445993, 0.6955093592405319, 0.7124064732342958, 0.7425152882933617]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7127053458243608
accuracy: 61.92660140991211
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.92660140991211], 'time': [0.7127053458243608]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7284755995497108
accuracy: 63.76146697998047
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.92660140991211, 63.76146697998047], 'time': [0.7127053458243608, 0.7284755995497108]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8209880972281098
accuracy: 60.55045700073242
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.92660140991211, 63.76146697998047, 60.55045700073242], 'time': [0.7127053458243608, 0.7284755995497108, 0.8209880972281098]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7283199755474925
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.92660140991211, 63.76146697998047, 60.55045700073242, 61.46788787841797], 'time': [0.7127053458243608, 0.7284755995497108, 0.8209880972281098, 0.7283199755474925]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7525957627221942
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.92660140991211, 63.76146697998047, 60.55045700073242, 61.46788787841797, 61.46788787841797], 'time': [0.7127053458243608, 0.7284755995497108, 0.8209880972281098, 0.7283199755474925, 0.7525957627221942]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7059010611847043
accuracy: 74.77063751220703
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [74.77063751220703], 'time': [0.7059010611847043]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8111081393435597
accuracy: 73.85321044921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [74.77063751220703, 73.85321044921875], 'time': [0.7059010611847043, 0.8111081393435597]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8058845493942499
accuracy: 72.93577575683594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [74.77063751220703, 73.85321044921875, 72.93577575683594], 'time': [0.7059010611847043, 0.8111081393435597, 0.8058845493942499]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7680352795869112
accuracy: 73.85321044921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [74.77063751220703, 73.85321044921875, 72.93577575683594, 73.85321044921875], 'time': [0.7059010611847043, 0.8111081393435597, 0.8058845493942499, 0.7680352795869112]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7551282374188304
accuracy: 72.93577575683594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [74.77063751220703, 73.85321044921875, 72.93577575683594, 73.85321044921875, 72.93577575683594], 'time': [0.7059010611847043, 0.8111081393435597, 0.8058845493942499, 0.7680352795869112, 0.7551282374188304]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7956925500184298
accuracy: 76.14678192138672
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672], 'time': [0.7956925500184298]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7378737116232514
accuracy: 77.52293395996094
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672, 77.52293395996094], 'time': [0.7956925500184298, 0.7378737116232514]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7626197682693601
accuracy: 79.81651306152344
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672, 77.52293395996094, 79.81651306152344], 'time': [0.7956925500184298, 0.7378737116232514, 0.7626197682693601]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7142642550170422
accuracy: 78.89907836914062
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672, 77.52293395996094, 79.81651306152344, 78.89907836914062], 'time': [0.7956925500184298, 0.7378737116232514, 0.7626197682693601, 0.7142642550170422]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7259165998548269
accuracy: 79.35779571533203
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672, 77.52293395996094, 79.81651306152344, 78.89907836914062, 79.35779571533203], 'time': [0.7956925500184298, 0.7378737116232514, 0.7626197682693601, 0.7142642550170422, 0.7259165998548269]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.71222959831357
accuracy: 30.27522850036621
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [30.27522850036621], 'time': [0.71222959831357]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7256820490583777
accuracy: 29.357797622680664
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [30.27522850036621, 29.357797622680664], 'time': [0.71222959831357, 0.7256820490583777]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7032060669735074
accuracy: 33.027523040771484
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [30.27522850036621, 29.357797622680664, 33.027523040771484], 'time': [0.71222959831357, 0.7256820490583777, 0.7032060669735074]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8660600492730737
accuracy: 23.85321044921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [30.27522850036621, 29.357797622680664, 33.027523040771484, 23.85321044921875], 'time': [0.71222959831357, 0.7256820490583777, 0.7032060669735074, 0.8660600492730737]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7863375758752227
accuracy: 25.688072204589844
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [30.27522850036621, 29.357797622680664, 33.027523040771484, 23.85321044921875, 25.688072204589844], 'time': [0.71222959831357, 0.7256820490583777, 0.7032060669735074, 0.8660600492730737, 0.7863375758752227]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.778264912776649
accuracy: 50.91743087768555
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [50.91743087768555], 'time': [0.778264912776649]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7349380124360323
accuracy: 49.541282653808594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [50.91743087768555, 49.541282653808594], 'time': [0.778264912776649, 0.7349380124360323]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.804237793199718
accuracy: 47.247703552246094
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [50.91743087768555, 49.541282653808594, 47.247703552246094], 'time': [0.778264912776649, 0.7349380124360323, 0.804237793199718]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6961575970053673
accuracy: 44.03669357299805
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [50.91743087768555, 49.541282653808594, 47.247703552246094, 44.03669357299805], 'time': [0.778264912776649, 0.7349380124360323, 0.804237793199718, 0.6961575970053673]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.760176345705986
accuracy: 51.834861755371094
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [50.91743087768555, 49.541282653808594, 47.247703552246094, 44.03669357299805, 51.834861755371094], 'time': [0.778264912776649, 0.7349380124360323, 0.804237793199718, 0.6961575970053673, 0.760176345705986]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7375969821587205
accuracy: 57.339447021484375
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [57.339447021484375], 'time': [0.7375969821587205]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7843312453478575
accuracy: 62.385318756103516
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [57.339447021484375, 62.385318756103516], 'time': [0.7375969821587205, 0.7843312453478575]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7531557139009237
accuracy: 58.71559524536133
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [57.339447021484375, 62.385318756103516, 58.71559524536133], 'time': [0.7375969821587205, 0.7843312453478575, 0.7531557139009237]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7611087281256914
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [57.339447021484375, 62.385318756103516, 58.71559524536133, 60.091739654541016], 'time': [0.7375969821587205, 0.7843312453478575, 0.7531557139009237, 0.7611087281256914]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.756177450530231
accuracy: 60.55045700073242
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [57.339447021484375, 62.385318756103516, 58.71559524536133, 60.091739654541016, 60.55045700073242], 'time': [0.7375969821587205, 0.7843312453478575, 0.7531557139009237, 0.7611087281256914, 0.756177450530231]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8459148053079844
accuracy: 36.238529205322266
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [36.238529205322266], 'time': [0.8459148053079844]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7912218747660518
accuracy: 40.36697006225586
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [36.238529205322266, 40.36697006225586], 'time': [0.8459148053079844, 0.7912218747660518]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.722223105840385
accuracy: 35.32109832763672
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [36.238529205322266, 40.36697006225586, 35.32109832763672], 'time': [0.8459148053079844, 0.7912218747660518, 0.722223105840385]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8053065286949277
accuracy: 28.89908218383789
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [36.238529205322266, 40.36697006225586, 35.32109832763672, 28.89908218383789], 'time': [0.8459148053079844, 0.7912218747660518, 0.722223105840385, 0.8053065286949277]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.72037463914603
accuracy: 32.11009216308594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [36.238529205322266, 40.36697006225586, 35.32109832763672, 28.89908218383789, 32.11009216308594], 'time': [0.8459148053079844, 0.7912218747660518, 0.722223105840385, 0.8053065286949277, 0.72037463914603]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6962503418326378
accuracy: 47.7064208984375
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.7064208984375], 'time': [0.6962503418326378]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7925680708140135
accuracy: 49.08256530761719
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.7064208984375, 49.08256530761719], 'time': [0.6962503418326378, 0.7925680708140135]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8017450291663408
accuracy: 52.75229263305664
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.7064208984375, 49.08256530761719, 52.75229263305664], 'time': [0.6962503418326378, 0.7925680708140135, 0.8017450291663408]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8010375835001469
accuracy: 51.37614440917969
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.7064208984375, 49.08256530761719, 52.75229263305664, 51.37614440917969], 'time': [0.6962503418326378, 0.7925680708140135, 0.8017450291663408, 0.8010375835001469]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7249119905754924
accuracy: 50.45871353149414
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.7064208984375, 49.08256530761719, 52.75229263305664, 51.37614440917969, 50.45871353149414], 'time': [0.6962503418326378, 0.7925680708140135, 0.8017450291663408, 0.8010375835001469, 0.7249119905754924]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.721212494187057
accuracy: 61.92660140991211
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.92660140991211], 'time': [0.721212494187057]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6994175603613257
accuracy: 64.22018432617188
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.92660140991211, 64.22018432617188], 'time': [0.721212494187057, 0.6994175603613257]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8861658209934831
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.92660140991211, 64.22018432617188, 61.46788787841797], 'time': [0.721212494187057, 0.6994175603613257, 0.8861658209934831]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7680291449651122
accuracy: 61.00917053222656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.92660140991211, 64.22018432617188, 61.46788787841797, 61.00917053222656], 'time': [0.721212494187057, 0.6994175603613257, 0.8861658209934831, 0.7680291449651122]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7019276479259133
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.92660140991211, 64.22018432617188, 61.46788787841797, 61.00917053222656, 61.46788787841797], 'time': [0.721212494187057, 0.6994175603613257, 0.8861658209934831, 0.7680291449651122, 0.7019276479259133]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7533304030075669
accuracy: 54.587154388427734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [54.587154388427734], 'time': [0.7533304030075669]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6864041863009334
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [54.587154388427734, 61.46788787841797], 'time': [0.7533304030075669, 0.6864041863009334]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7677303338423371
accuracy: 55.045867919921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [54.587154388427734, 61.46788787841797, 55.045867919921875], 'time': [0.7533304030075669, 0.6864041863009334, 0.7677303338423371]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7287959437817335
accuracy: 55.045867919921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [54.587154388427734, 61.46788787841797, 55.045867919921875, 55.045867919921875], 'time': [0.7533304030075669, 0.6864041863009334, 0.7677303338423371, 0.7287959437817335]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7653872314840555
accuracy: 60.55045700073242
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [54.587154388427734, 61.46788787841797, 55.045867919921875, 55.045867919921875, 60.55045700073242], 'time': [0.7533304030075669, 0.6864041863009334, 0.7677303338423371, 0.7287959437817335, 0.7653872314840555]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7191882990300655
accuracy: 68.34861755371094
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [68.34861755371094], 'time': [0.7191882990300655]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7306984905153513
accuracy: 69.72476959228516
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [68.34861755371094, 69.72476959228516], 'time': [0.7191882990300655, 0.7306984905153513]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7051958106458187
accuracy: 71.55963134765625
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [68.34861755371094, 69.72476959228516, 71.55963134765625], 'time': [0.7191882990300655, 0.7306984905153513, 0.7051958106458187]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7407061718404293
accuracy: 71.10091400146484
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [68.34861755371094, 69.72476959228516, 71.55963134765625, 71.10091400146484], 'time': [0.7191882990300655, 0.7306984905153513, 0.7051958106458187, 0.7407061718404293]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8316959552466869
accuracy: 74.77063751220703
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [68.34861755371094, 69.72476959228516, 71.55963134765625, 71.10091400146484, 74.77063751220703], 'time': [0.7191882990300655, 0.7306984905153513, 0.7051958106458187, 0.7407061718404293, 0.8316959552466869]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7457887241616845
accuracy: 74.31192016601562
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [74.31192016601562], 'time': [0.7457887241616845]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6880595460534096
accuracy: 79.35779571533203
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [74.31192016601562, 79.35779571533203], 'time': [0.7457887241616845, 0.6880595460534096]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7796782795339823
accuracy: 79.81651306152344
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [74.31192016601562, 79.35779571533203, 79.81651306152344], 'time': [0.7457887241616845, 0.6880595460534096, 0.7796782795339823]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7183232204988599
accuracy: 78.44036102294922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [74.31192016601562, 79.35779571533203, 79.81651306152344, 78.44036102294922], 'time': [0.7457887241616845, 0.6880595460534096, 0.7796782795339823, 0.7183232204988599]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8342318357899785
accuracy: 79.35779571533203
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [74.31192016601562, 79.35779571533203, 79.81651306152344, 78.44036102294922, 79.35779571533203], 'time': [0.7457887241616845, 0.6880595460534096, 0.7796782795339823, 0.7183232204988599, 0.8342318357899785]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7118643065914512
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [60.091739654541016], 'time': [0.7118643065914512]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8008816763758659
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [60.091739654541016, 60.091739654541016], 'time': [0.7118643065914512, 0.8008816763758659]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.743738254532218
accuracy: 60.55045700073242
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [60.091739654541016, 60.091739654541016, 60.55045700073242], 'time': [0.7118643065914512, 0.8008816763758659, 0.743738254532218]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7183031272143126
accuracy: 57.339447021484375
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [60.091739654541016, 60.091739654541016, 60.55045700073242, 57.339447021484375], 'time': [0.7118643065914512, 0.8008816763758659, 0.743738254532218, 0.7183031272143126]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7671509552747011
accuracy: 58.71559524536133
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [60.091739654541016, 60.091739654541016, 60.55045700073242, 57.339447021484375, 58.71559524536133], 'time': [0.7118643065914512, 0.8008816763758659, 0.743738254532218, 0.7183031272143126, 0.7671509552747011]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6875754967331886
accuracy: 70.64219665527344
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [70.64219665527344], 'time': [0.6875754967331886]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7969728661701083
accuracy: 70.18348693847656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [70.64219665527344, 70.18348693847656], 'time': [0.6875754967331886, 0.7969728661701083]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7349114902317524
accuracy: 69.26605224609375
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [70.64219665527344, 70.18348693847656, 69.26605224609375], 'time': [0.6875754967331886, 0.7969728661701083, 0.7349114902317524]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.74610008392483
accuracy: 70.18348693847656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [70.64219665527344, 70.18348693847656, 69.26605224609375, 70.18348693847656], 'time': [0.6875754967331886, 0.7969728661701083, 0.7349114902317524, 0.74610008392483]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7206169441342354
accuracy: 71.55963134765625
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [70.64219665527344, 70.18348693847656, 69.26605224609375, 70.18348693847656, 71.55963134765625], 'time': [0.6875754967331886, 0.7969728661701083, 0.7349114902317524, 0.74610008392483, 0.7206169441342354]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7582143815234303
accuracy: 76.60549926757812
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.60549926757812], 'time': [0.7582143815234303]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8104252154007554
accuracy: 77.06421661376953
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.60549926757812, 77.06421661376953], 'time': [0.7582143815234303, 0.8104252154007554]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8003936503082514
accuracy: 80.27522277832031
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.60549926757812, 77.06421661376953, 80.27522277832031], 'time': [0.7582143815234303, 0.8104252154007554, 0.8003936503082514]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.753487054258585
accuracy: 78.44036102294922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.60549926757812, 77.06421661376953, 80.27522277832031, 78.44036102294922], 'time': [0.7582143815234303, 0.8104252154007554, 0.8003936503082514, 0.753487054258585]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7570742079988122
accuracy: 78.44036102294922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.60549926757812, 77.06421661376953, 80.27522277832031, 78.44036102294922, 78.44036102294922], 'time': [0.7582143815234303, 0.8104252154007554, 0.8003936503082514, 0.753487054258585, 0.7570742079988122]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7866384275257587
accuracy: 30.27522850036621
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [30.27522850036621], 'time': [0.7866384275257587]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7683351738378406
accuracy: 31.65137481689453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [30.27522850036621, 31.65137481689453], 'time': [0.7866384275257587, 0.7683351738378406]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7419484928250313
accuracy: 22.018346786499023
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [30.27522850036621, 31.65137481689453, 22.018346786499023], 'time': [0.7866384275257587, 0.7683351738378406, 0.7419484928250313]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8320014197379351
accuracy: 26.146787643432617
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [30.27522850036621, 31.65137481689453, 22.018346786499023, 26.146787643432617], 'time': [0.7866384275257587, 0.7683351738378406, 0.7419484928250313, 0.8320014197379351]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7495536785572767
accuracy: 38.532108306884766
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [30.27522850036621, 31.65137481689453, 22.018346786499023, 26.146787643432617, 38.532108306884766], 'time': [0.7866384275257587, 0.7683351738378406, 0.7419484928250313, 0.8320014197379351, 0.7495536785572767]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7315201619639993
accuracy: 48.16513442993164
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [48.16513442993164], 'time': [0.7315201619639993]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.766143555752933
accuracy: 49.541282653808594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [48.16513442993164, 49.541282653808594], 'time': [0.7315201619639993, 0.766143555752933]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7018593167886138
accuracy: 41.284400939941406
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [48.16513442993164, 49.541282653808594, 41.284400939941406], 'time': [0.7315201619639993, 0.766143555752933, 0.7018593167886138]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7658257093280554
accuracy: 43.1192626953125
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [48.16513442993164, 49.541282653808594, 41.284400939941406, 43.1192626953125], 'time': [0.7315201619639993, 0.766143555752933, 0.7018593167886138, 0.7658257093280554]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7921066777780652
accuracy: 46.78899002075195
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [48.16513442993164, 49.541282653808594, 41.284400939941406, 43.1192626953125, 46.78899002075195], 'time': [0.7315201619639993, 0.766143555752933, 0.7018593167886138, 0.7658257093280554, 0.7921066777780652]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7844609161838889
accuracy: 54.587154388427734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [54.587154388427734], 'time': [0.7844609161838889]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7226656368002295
accuracy: 60.55045700073242
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [54.587154388427734, 60.55045700073242], 'time': [0.7844609161838889, 0.7226656368002295]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7634803466498852
accuracy: 55.045867919921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [54.587154388427734, 60.55045700073242, 55.045867919921875], 'time': [0.7844609161838889, 0.7226656368002295, 0.7634803466498852]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7357410956174135
accuracy: 55.045867919921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [54.587154388427734, 60.55045700073242, 55.045867919921875, 55.045867919921875], 'time': [0.7844609161838889, 0.7226656368002295, 0.7634803466498852, 0.7357410956174135]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7368184113875031
accuracy: 61.00917053222656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [54.587154388427734, 60.55045700073242, 55.045867919921875, 55.045867919921875, 61.00917053222656], 'time': [0.7844609161838889, 0.7226656368002295, 0.7634803466498852, 0.7357410956174135, 0.7368184113875031]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.9129869751632214
accuracy: 42.66054916381836
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [42.66054916381836], 'time': [0.9129869751632214]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7445923695340753
accuracy: 31.65137481689453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [42.66054916381836, 31.65137481689453], 'time': [0.9129869751632214, 0.7445923695340753]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7546292152255774
accuracy: 37.61467742919922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [42.66054916381836, 31.65137481689453, 37.61467742919922], 'time': [0.9129869751632214, 0.7445923695340753, 0.7546292152255774]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6963134929537773
accuracy: 31.192659378051758
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [42.66054916381836, 31.65137481689453, 37.61467742919922, 31.192659378051758], 'time': [0.9129869751632214, 0.7445923695340753, 0.7546292152255774, 0.6963134929537773]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7718422692269087
accuracy: 34.40366744995117
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [42.66054916381836, 31.65137481689453, 37.61467742919922, 31.192659378051758, 34.40366744995117], 'time': [0.9129869751632214, 0.7445923695340753, 0.7546292152255774, 0.6963134929537773, 0.7718422692269087]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8178070681169629
accuracy: 24.311925888061523
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [24.311925888061523], 'time': [0.8178070681169629]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7507715197280049
accuracy: 29.816513061523438
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [24.311925888061523, 29.816513061523438], 'time': [0.8178070681169629, 0.7507715197280049]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7467271275818348
accuracy: 49.541282653808594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [24.311925888061523, 29.816513061523438, 49.541282653808594], 'time': [0.8178070681169629, 0.7507715197280049, 0.7467271275818348]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7458801567554474
accuracy: 51.37614440917969
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [24.311925888061523, 29.816513061523438, 49.541282653808594, 51.37614440917969], 'time': [0.8178070681169629, 0.7507715197280049, 0.7467271275818348, 0.7458801567554474]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7794363908469677
accuracy: 49.541282653808594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [24.311925888061523, 29.816513061523438, 49.541282653808594, 51.37614440917969, 49.541282653808594], 'time': [0.8178070681169629, 0.7507715197280049, 0.7467271275818348, 0.7458801567554474, 0.7794363908469677]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7182529829442501
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.46788787841797], 'time': [0.7182529829442501]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7904986348003149
accuracy: 59.633026123046875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.46788787841797, 59.633026123046875], 'time': [0.7182529829442501, 0.7904986348003149]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.9171953704208136
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.46788787841797, 59.633026123046875, 60.091739654541016], 'time': [0.7182529829442501, 0.7904986348003149, 0.9171953704208136]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7017093114554882
accuracy: 55.96329879760742
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.46788787841797, 59.633026123046875, 60.091739654541016, 55.96329879760742], 'time': [0.7182529829442501, 0.7904986348003149, 0.9171953704208136, 0.7017093114554882]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7435197569429874
accuracy: 59.17430877685547
{'params': {'reservoir_dim': 1000, 'spectral_radius': 0.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.46788787841797, 59.633026123046875, 60.091739654541016, 55.96329879760742, 59.17430877685547], 'time': [0.7182529829442501, 0.7904986348003149, 0.9171953704208136, 0.7017093114554882, 0.7435197569429874]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.745177648961544
accuracy: 68.80733489990234
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [68.80733489990234], 'time': [0.745177648961544]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7803822932764888
accuracy: 71.55963134765625
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [68.80733489990234, 71.55963134765625], 'time': [0.745177648961544, 0.7803822932764888]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7916576704010367
accuracy: 75.68807220458984
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [68.80733489990234, 71.55963134765625, 75.68807220458984], 'time': [0.745177648961544, 0.7803822932764888, 0.7916576704010367]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7280043307691813
accuracy: 72.93577575683594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [68.80733489990234, 71.55963134765625, 75.68807220458984, 72.93577575683594], 'time': [0.745177648961544, 0.7803822932764888, 0.7916576704010367, 0.7280043307691813]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7129823435097933
accuracy: 74.31192016601562
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [68.80733489990234, 71.55963134765625, 75.68807220458984, 72.93577575683594, 74.31192016601562], 'time': [0.745177648961544, 0.7803822932764888, 0.7916576704010367, 0.7280043307691813, 0.7129823435097933]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7761870278045535
accuracy: 79.35779571533203
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [79.35779571533203], 'time': [0.7761870278045535]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7415844723582268
accuracy: 80.73394012451172
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [79.35779571533203, 80.73394012451172], 'time': [0.7761870278045535, 0.7415844723582268]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7541085667908192
accuracy: 79.35779571533203
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [79.35779571533203, 80.73394012451172, 79.35779571533203], 'time': [0.7761870278045535, 0.7415844723582268, 0.7541085667908192]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7224406767636538
accuracy: 80.73394012451172
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [79.35779571533203, 80.73394012451172, 79.35779571533203, 80.73394012451172], 'time': [0.7761870278045535, 0.7415844723582268, 0.7541085667908192, 0.7224406767636538]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.928277344442904
accuracy: 81.65137481689453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [79.35779571533203, 80.73394012451172, 79.35779571533203, 80.73394012451172, 81.65137481689453], 'time': [0.7761870278045535, 0.7415844723582268, 0.7541085667908192, 0.7224406767636538, 0.928277344442904]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6915517784655094
accuracy: 80.27522277832031
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [80.27522277832031], 'time': [0.6915517784655094]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7278216313570738
accuracy: 81.19265747070312
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [80.27522277832031, 81.19265747070312], 'time': [0.6915517784655094, 0.7278216313570738]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7167406659573317
accuracy: 82.56880187988281
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [80.27522277832031, 81.19265747070312, 82.56880187988281], 'time': [0.6915517784655094, 0.7278216313570738, 0.7167406659573317]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7477713180705905
accuracy: 81.19265747070312
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [80.27522277832031, 81.19265747070312, 82.56880187988281, 81.19265747070312], 'time': [0.6915517784655094, 0.7278216313570738, 0.7167406659573317, 0.7477713180705905]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7513796370476484
accuracy: 82.1100845336914
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [80.27522277832031, 81.19265747070312, 82.56880187988281, 81.19265747070312, 82.1100845336914], 'time': [0.6915517784655094, 0.7278216313570738, 0.7167406659573317, 0.7477713180705905, 0.7513796370476484]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7801927393302321
accuracy: 74.31192016601562
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [74.31192016601562], 'time': [0.7801927393302321]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7288605896756053
accuracy: 69.26605224609375
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [74.31192016601562, 69.26605224609375], 'time': [0.7801927393302321, 0.7288605896756053]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7016483107581735
accuracy: 73.39449310302734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [74.31192016601562, 69.26605224609375, 73.39449310302734], 'time': [0.7801927393302321, 0.7288605896756053, 0.7016483107581735]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7409884408116341
accuracy: 73.39449310302734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [74.31192016601562, 69.26605224609375, 73.39449310302734, 73.39449310302734], 'time': [0.7801927393302321, 0.7288605896756053, 0.7016483107581735, 0.7409884408116341]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7209612745791674
accuracy: 71.55963134765625
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [74.31192016601562, 69.26605224609375, 73.39449310302734, 73.39449310302734, 71.55963134765625], 'time': [0.7801927393302321, 0.7288605896756053, 0.7016483107581735, 0.7409884408116341, 0.7209612745791674]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7914046794176102
accuracy: 80.27522277832031
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [80.27522277832031], 'time': [0.7914046794176102]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8144623395055532
accuracy: 79.35779571533203
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [80.27522277832031, 79.35779571533203], 'time': [0.7914046794176102, 0.8144623395055532]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.757960356771946
accuracy: 81.65137481689453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [80.27522277832031, 79.35779571533203, 81.65137481689453], 'time': [0.7914046794176102, 0.8144623395055532, 0.757960356771946]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7323985379189253
accuracy: 80.27522277832031
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [80.27522277832031, 79.35779571533203, 81.65137481689453, 80.27522277832031], 'time': [0.7914046794176102, 0.8144623395055532, 0.757960356771946, 0.7323985379189253]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.727204424329102
accuracy: 81.19265747070312
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [80.27522277832031, 79.35779571533203, 81.65137481689453, 80.27522277832031, 81.19265747070312], 'time': [0.7914046794176102, 0.8144623395055532, 0.757960356771946, 0.7323985379189253, 0.727204424329102]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7590111223980784
accuracy: 78.44036102294922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922], 'time': [0.7590111223980784]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7479258952662349
accuracy: 79.81651306152344
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922, 79.81651306152344], 'time': [0.7590111223980784, 0.7479258952662349]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7777335001155734
accuracy: 81.65137481689453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922, 79.81651306152344, 81.65137481689453], 'time': [0.7590111223980784, 0.7479258952662349, 0.7777335001155734]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7011719094589353
accuracy: 79.81651306152344
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922, 79.81651306152344, 81.65137481689453, 79.81651306152344], 'time': [0.7590111223980784, 0.7479258952662349, 0.7777335001155734, 0.7011719094589353]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7737508397549391
accuracy: 82.56880187988281
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922, 79.81651306152344, 81.65137481689453, 79.81651306152344, 82.56880187988281], 'time': [0.7590111223980784, 0.7479258952662349, 0.7777335001155734, 0.7011719094589353, 0.7737508397549391]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7881988333538175
accuracy: 32.56880569458008
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [32.56880569458008], 'time': [0.7881988333538175]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7089262269437313
accuracy: 34.86238479614258
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [32.56880569458008, 34.86238479614258], 'time': [0.7881988333538175, 0.7089262269437313]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7714343126863241
accuracy: 38.532108306884766
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [32.56880569458008, 34.86238479614258, 38.532108306884766], 'time': [0.7881988333538175, 0.7089262269437313, 0.7714343126863241]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.883391847833991
accuracy: 38.07339096069336
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [32.56880569458008, 34.86238479614258, 38.532108306884766, 38.07339096069336], 'time': [0.7881988333538175, 0.7089262269437313, 0.7714343126863241, 0.883391847833991]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7024780046194792
accuracy: 36.238529205322266
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [32.56880569458008, 34.86238479614258, 38.532108306884766, 38.07339096069336, 36.238529205322266], 'time': [0.7881988333538175, 0.7089262269437313, 0.7714343126863241, 0.883391847833991, 0.7024780046194792]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7159433187916875
accuracy: 55.045867919921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [55.045867919921875], 'time': [0.7159433187916875]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7442292477935553
accuracy: 63.30274963378906
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [55.045867919921875, 63.30274963378906], 'time': [0.7159433187916875, 0.7442292477935553]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7972024232149124
accuracy: 61.92660140991211
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [55.045867919921875, 63.30274963378906, 61.92660140991211], 'time': [0.7159433187916875, 0.7442292477935553, 0.7972024232149124]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7370504066348076
accuracy: 60.55045700073242
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [55.045867919921875, 63.30274963378906, 61.92660140991211, 60.55045700073242], 'time': [0.7159433187916875, 0.7442292477935553, 0.7972024232149124, 0.7370504066348076]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7692020023241639
accuracy: 59.633026123046875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [55.045867919921875, 63.30274963378906, 61.92660140991211, 60.55045700073242, 59.633026123046875], 'time': [0.7159433187916875, 0.7442292477935553, 0.7972024232149124, 0.7370504066348076, 0.7692020023241639]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7533281007781625
accuracy: 69.26605224609375
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [69.26605224609375], 'time': [0.7533281007781625]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7913756240159273
accuracy: 71.55963134765625
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [69.26605224609375, 71.55963134765625], 'time': [0.7533281007781625, 0.7913756240159273]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7453468609601259
accuracy: 74.77063751220703
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [69.26605224609375, 71.55963134765625, 74.77063751220703], 'time': [0.7533281007781625, 0.7913756240159273, 0.7453468609601259]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7496159598231316
accuracy: 73.39449310302734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [69.26605224609375, 71.55963134765625, 74.77063751220703, 73.39449310302734], 'time': [0.7533281007781625, 0.7913756240159273, 0.7453468609601259, 0.7496159598231316]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7835223078727722
accuracy: 75.22935485839844
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [69.26605224609375, 71.55963134765625, 74.77063751220703, 73.39449310302734, 75.22935485839844], 'time': [0.7533281007781625, 0.7913756240159273, 0.7453468609601259, 0.7496159598231316, 0.7835223078727722]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8429433610290289
accuracy: 29.357797622680664
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [29.357797622680664], 'time': [0.8429433610290289]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7097019478678703
accuracy: 36.69724655151367
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [29.357797622680664, 36.69724655151367], 'time': [0.8429433610290289, 0.7097019478678703]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7218611165881157
accuracy: 38.99082565307617
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [29.357797622680664, 36.69724655151367, 38.99082565307617], 'time': [0.8429433610290289, 0.7097019478678703, 0.7218611165881157]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7305694464594126
accuracy: 33.486236572265625
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [29.357797622680664, 36.69724655151367, 38.99082565307617, 33.486236572265625], 'time': [0.8429433610290289, 0.7097019478678703, 0.7218611165881157, 0.7305694464594126]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7328732665628195
accuracy: 34.40366744995117
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [29.357797622680664, 36.69724655151367, 38.99082565307617, 33.486236572265625, 34.40366744995117], 'time': [0.8429433610290289, 0.7097019478678703, 0.7218611165881157, 0.7305694464594126, 0.7328732665628195]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8032829659059644
accuracy: 58.71559524536133
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [58.71559524536133], 'time': [0.8032829659059644]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7417053692042828
accuracy: 61.00917053222656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [58.71559524536133, 61.00917053222656], 'time': [0.8032829659059644, 0.7417053692042828]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.743803396821022
accuracy: 62.385318756103516
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [58.71559524536133, 61.00917053222656, 62.385318756103516], 'time': [0.8032829659059644, 0.7417053692042828, 0.743803396821022]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7438911776989698
accuracy: 65.59632873535156
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [58.71559524536133, 61.00917053222656, 62.385318756103516, 65.59632873535156], 'time': [0.8032829659059644, 0.7417053692042828, 0.743803396821022, 0.7438911776989698]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.774695448577404
accuracy: 59.17430877685547
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [58.71559524536133, 61.00917053222656, 62.385318756103516, 65.59632873535156, 59.17430877685547], 'time': [0.8032829659059644, 0.7417053692042828, 0.743803396821022, 0.7438911776989698, 0.774695448577404]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7076624752953649
accuracy: 73.39449310302734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [73.39449310302734], 'time': [0.7076624752953649]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7649561343714595
accuracy: 69.72476959228516
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [73.39449310302734, 69.72476959228516], 'time': [0.7076624752953649, 0.7649561343714595]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8744060341268778
accuracy: 73.85321044921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [73.39449310302734, 69.72476959228516, 73.85321044921875], 'time': [0.7076624752953649, 0.7649561343714595, 0.8744060341268778]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7609644522890449
accuracy: 74.31192016601562
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [73.39449310302734, 69.72476959228516, 73.85321044921875, 74.31192016601562], 'time': [0.7076624752953649, 0.7649561343714595, 0.8744060341268778, 0.7609644522890449]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7504404056817293
accuracy: 75.22935485839844
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [73.39449310302734, 69.72476959228516, 73.85321044921875, 74.31192016601562, 75.22935485839844], 'time': [0.7076624752953649, 0.7649561343714595, 0.8744060341268778, 0.7609644522890449, 0.7504404056817293]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7334625208750367
accuracy: 55.50458526611328
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [55.50458526611328], 'time': [0.7334625208750367]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.744260330684483
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [55.50458526611328, 60.091739654541016], 'time': [0.7334625208750367, 0.744260330684483]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7497715502977371
accuracy: 61.00917053222656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [55.50458526611328, 60.091739654541016, 61.00917053222656], 'time': [0.7334625208750367, 0.744260330684483, 0.7497715502977371]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.729614288546145
accuracy: 54.12843704223633
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [55.50458526611328, 60.091739654541016, 61.00917053222656, 54.12843704223633], 'time': [0.7334625208750367, 0.744260330684483, 0.7497715502977371, 0.729614288546145]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7642455473542213
accuracy: 60.55045700073242
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [55.50458526611328, 60.091739654541016, 61.00917053222656, 54.12843704223633, 60.55045700073242], 'time': [0.7334625208750367, 0.744260330684483, 0.7497715502977371, 0.729614288546145, 0.7642455473542213]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7397000025957823
accuracy: 66.51375579833984
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [66.51375579833984], 'time': [0.7397000025957823]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7464500851929188
accuracy: 69.72476959228516
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [66.51375579833984, 69.72476959228516], 'time': [0.7397000025957823, 0.7464500851929188]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7477917680516839
accuracy: 75.22935485839844
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [66.51375579833984, 69.72476959228516, 75.22935485839844], 'time': [0.7397000025957823, 0.7464500851929188, 0.7477917680516839]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8124792911112309
accuracy: 72.01834869384766
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [66.51375579833984, 69.72476959228516, 75.22935485839844, 72.01834869384766], 'time': [0.7397000025957823, 0.7464500851929188, 0.7477917680516839, 0.8124792911112309]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8853460969403386
accuracy: 76.60549926757812
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [66.51375579833984, 69.72476959228516, 75.22935485839844, 72.01834869384766, 76.60549926757812], 'time': [0.7397000025957823, 0.7464500851929188, 0.7477917680516839, 0.8124792911112309, 0.8853460969403386]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8089091200381517
accuracy: 76.14678192138672
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672], 'time': [0.8089091200381517]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7061595311388373
accuracy: 76.60549926757812
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672, 76.60549926757812], 'time': [0.8089091200381517, 0.7061595311388373]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6910055270418525
accuracy: 78.44036102294922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672, 76.60549926757812, 78.44036102294922], 'time': [0.8089091200381517, 0.7061595311388373, 0.6910055270418525]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7158860452473164
accuracy: 81.65137481689453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672, 76.60549926757812, 78.44036102294922, 81.65137481689453], 'time': [0.8089091200381517, 0.7061595311388373, 0.6910055270418525, 0.7158860452473164]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8245597342029214
accuracy: 79.35779571533203
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672, 76.60549926757812, 78.44036102294922, 81.65137481689453, 79.35779571533203], 'time': [0.8089091200381517, 0.7061595311388373, 0.6910055270418525, 0.7158860452473164, 0.8245597342029214]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8381864707916975
accuracy: 63.30274963378906
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [63.30274963378906], 'time': [0.8381864707916975]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7658371198922396
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [63.30274963378906, 60.091739654541016], 'time': [0.8381864707916975, 0.7658371198922396]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7766004763543606
accuracy: 59.17430877685547
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [63.30274963378906, 60.091739654541016, 59.17430877685547], 'time': [0.8381864707916975, 0.7658371198922396, 0.7766004763543606]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7272536754608154
accuracy: 56.88072967529297
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [63.30274963378906, 60.091739654541016, 59.17430877685547, 56.88072967529297], 'time': [0.8381864707916975, 0.7658371198922396, 0.7766004763543606, 0.7272536754608154]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7590244989842176
accuracy: 60.55045700073242
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [63.30274963378906, 60.091739654541016, 59.17430877685547, 56.88072967529297, 60.55045700073242], 'time': [0.8381864707916975, 0.7658371198922396, 0.7766004763543606, 0.7272536754608154, 0.7590244989842176]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7307905443012714
accuracy: 74.77063751220703
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [74.77063751220703], 'time': [0.7307905443012714]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7400896660983562
accuracy: 69.72476959228516
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [74.77063751220703, 69.72476959228516], 'time': [0.7307905443012714, 0.7400896660983562]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8775732591748238
accuracy: 67.88990783691406
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [74.77063751220703, 69.72476959228516, 67.88990783691406], 'time': [0.7307905443012714, 0.7400896660983562, 0.8775732591748238]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7089334670454264
accuracy: 68.34861755371094
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [74.77063751220703, 69.72476959228516, 67.88990783691406, 68.34861755371094], 'time': [0.7307905443012714, 0.7400896660983562, 0.8775732591748238, 0.7089334670454264]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8419089540839195
accuracy: 73.39449310302734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [74.77063751220703, 69.72476959228516, 67.88990783691406, 68.34861755371094, 73.39449310302734], 'time': [0.7307905443012714, 0.7400896660983562, 0.8775732591748238, 0.7089334670454264, 0.8419089540839195]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7489916468039155
accuracy: 78.44036102294922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922], 'time': [0.7489916468039155]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7294938750565052
accuracy: 77.98165130615234
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922, 77.98165130615234], 'time': [0.7489916468039155, 0.7294938750565052]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7675398755818605
accuracy: 78.89907836914062
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922, 77.98165130615234, 78.89907836914062], 'time': [0.7489916468039155, 0.7294938750565052, 0.7675398755818605]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7370995245873928
accuracy: 78.89907836914062
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922, 77.98165130615234, 78.89907836914062, 78.89907836914062], 'time': [0.7489916468039155, 0.7294938750565052, 0.7675398755818605, 0.7370995245873928]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7165868040174246
accuracy: 80.27522277832031
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922, 77.98165130615234, 78.89907836914062, 78.89907836914062, 80.27522277832031], 'time': [0.7489916468039155, 0.7294938750565052, 0.7675398755818605, 0.7370995245873928, 0.7165868040174246]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7961846087127924
accuracy: 28.440364837646484
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [28.440364837646484], 'time': [0.7961846087127924]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7256558239459991
accuracy: 27.064218521118164
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [28.440364837646484, 27.064218521118164], 'time': [0.7961846087127924, 0.7256558239459991]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7485277159139514
accuracy: 26.60550308227539
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [28.440364837646484, 27.064218521118164, 26.60550308227539], 'time': [0.7961846087127924, 0.7256558239459991, 0.7485277159139514]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7829210180789232
accuracy: 22.47706413269043
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [28.440364837646484, 27.064218521118164, 26.60550308227539, 22.47706413269043], 'time': [0.7961846087127924, 0.7256558239459991, 0.7485277159139514, 0.7829210180789232]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8281316831707954
accuracy: 27.98164939880371
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [28.440364837646484, 27.064218521118164, 26.60550308227539, 22.47706413269043, 27.98164939880371], 'time': [0.7961846087127924, 0.7256558239459991, 0.7485277159139514, 0.7829210180789232, 0.8281316831707954]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7900051614269614
accuracy: 47.7064208984375
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.7064208984375], 'time': [0.7900051614269614]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7308422764763236
accuracy: 45.871559143066406
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.7064208984375, 45.871559143066406], 'time': [0.7900051614269614, 0.7308422764763236]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8129037991166115
accuracy: 46.33027267456055
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.7064208984375, 45.871559143066406, 46.33027267456055], 'time': [0.7900051614269614, 0.7308422764763236, 0.8129037991166115]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7319534569978714
accuracy: 35.32109832763672
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.7064208984375, 45.871559143066406, 46.33027267456055, 35.32109832763672], 'time': [0.7900051614269614, 0.7308422764763236, 0.8129037991166115, 0.7319534569978714]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7633822076022625
accuracy: 48.16513442993164
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.7064208984375, 45.871559143066406, 46.33027267456055, 35.32109832763672, 48.16513442993164], 'time': [0.7900051614269614, 0.7308422764763236, 0.8129037991166115, 0.7319534569978714, 0.7633822076022625]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7142565343528986
accuracy: 55.50458526611328
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [55.50458526611328], 'time': [0.7142565343528986]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7129513025283813
accuracy: 59.633026123046875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [55.50458526611328, 59.633026123046875], 'time': [0.7142565343528986, 0.7129513025283813]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7323649944737554
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [55.50458526611328, 59.633026123046875, 60.091739654541016], 'time': [0.7142565343528986, 0.7129513025283813, 0.7323649944737554]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6942977989092469
accuracy: 54.12843704223633
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [55.50458526611328, 59.633026123046875, 60.091739654541016, 54.12843704223633], 'time': [0.7142565343528986, 0.7129513025283813, 0.7323649944737554, 0.6942977989092469]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7222524993121624
accuracy: 60.55045700073242
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [55.50458526611328, 59.633026123046875, 60.091739654541016, 54.12843704223633, 60.55045700073242], 'time': [0.7142565343528986, 0.7129513025283813, 0.7323649944737554, 0.6942977989092469, 0.7222524993121624]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7887531332671642
accuracy: 27.064218521118164
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [27.064218521118164], 'time': [0.7887531332671642]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7101033376529813
accuracy: 27.064218521118164
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [27.064218521118164, 27.064218521118164], 'time': [0.7887531332671642, 0.7101033376529813]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7728481739759445
accuracy: 33.027523040771484
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [27.064218521118164, 27.064218521118164, 33.027523040771484], 'time': [0.7887531332671642, 0.7101033376529813, 0.7728481739759445]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7095960127189755
accuracy: 27.064218521118164
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [27.064218521118164, 27.064218521118164, 33.027523040771484, 27.064218521118164], 'time': [0.7887531332671642, 0.7101033376529813, 0.7728481739759445, 0.7095960127189755]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7053687740117311
accuracy: 29.816513061523438
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [27.064218521118164, 27.064218521118164, 33.027523040771484, 27.064218521118164, 29.816513061523438], 'time': [0.7887531332671642, 0.7101033376529813, 0.7728481739759445, 0.7095960127189755, 0.7053687740117311]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7480290811508894
accuracy: 48.62385177612305
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [48.62385177612305], 'time': [0.7480290811508894]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7430608002468944
accuracy: 45.412841796875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [48.62385177612305, 45.412841796875], 'time': [0.7480290811508894, 0.7430608002468944]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7169898897409439
accuracy: 50.91743087768555
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [48.62385177612305, 45.412841796875, 50.91743087768555], 'time': [0.7480290811508894, 0.7430608002468944, 0.7169898897409439]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7275663707405329
accuracy: 44.95412826538086
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [48.62385177612305, 45.412841796875, 50.91743087768555, 44.95412826538086], 'time': [0.7480290811508894, 0.7430608002468944, 0.7169898897409439, 0.7275663707405329]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7270540138706565
accuracy: 48.62385177612305
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [48.62385177612305, 45.412841796875, 50.91743087768555, 44.95412826538086, 48.62385177612305], 'time': [0.7480290811508894, 0.7430608002468944, 0.7169898897409439, 0.7275663707405329, 0.7270540138706565]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.712559649720788
accuracy: 61.92660140991211
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.92660140991211], 'time': [0.712559649720788]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7326599340885878
accuracy: 60.55045700073242
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.92660140991211, 60.55045700073242], 'time': [0.712559649720788, 0.7326599340885878]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.710632236674428
accuracy: 59.633026123046875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.92660140991211, 60.55045700073242, 59.633026123046875], 'time': [0.712559649720788, 0.7326599340885878, 0.710632236674428]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8376969499513507
accuracy: 56.42201614379883
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.92660140991211, 60.55045700073242, 59.633026123046875, 56.42201614379883], 'time': [0.712559649720788, 0.7326599340885878, 0.710632236674428, 0.8376969499513507]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8169092321768403
accuracy: 61.00917053222656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [61.92660140991211, 60.55045700073242, 59.633026123046875, 56.42201614379883, 61.00917053222656], 'time': [0.712559649720788, 0.7326599340885878, 0.710632236674428, 0.8376969499513507, 0.8169092321768403]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7107002092525363
accuracy: 55.96329879760742
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [55.96329879760742], 'time': [0.7107002092525363]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.757636115886271
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [55.96329879760742, 60.091739654541016], 'time': [0.7107002092525363, 0.757636115886271]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7669912772253156
accuracy: 58.25687789916992
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [55.96329879760742, 60.091739654541016, 58.25687789916992], 'time': [0.7107002092525363, 0.757636115886271, 0.7669912772253156]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7653847439214587
accuracy: 53.66972351074219
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [55.96329879760742, 60.091739654541016, 58.25687789916992, 53.66972351074219], 'time': [0.7107002092525363, 0.757636115886271, 0.7669912772253156, 0.7653847439214587]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7755864085629582
accuracy: 60.55045700073242
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [55.96329879760742, 60.091739654541016, 58.25687789916992, 53.66972351074219, 60.55045700073242], 'time': [0.7107002092525363, 0.757636115886271, 0.7669912772253156, 0.7653847439214587, 0.7755864085629582]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7546426188200712
accuracy: 66.51375579833984
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [66.51375579833984], 'time': [0.7546426188200712]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7417433643713593
accuracy: 69.26605224609375
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [66.51375579833984, 69.26605224609375], 'time': [0.7546426188200712, 0.7417433643713593]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7734166327863932
accuracy: 73.39449310302734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [66.51375579833984, 69.26605224609375, 73.39449310302734], 'time': [0.7546426188200712, 0.7417433643713593, 0.7734166327863932]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7440666975453496
accuracy: 72.01834869384766
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [66.51375579833984, 69.26605224609375, 73.39449310302734, 72.01834869384766], 'time': [0.7546426188200712, 0.7417433643713593, 0.7734166327863932, 0.7440666975453496]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7823817599564791
accuracy: 73.85321044921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [66.51375579833984, 69.26605224609375, 73.39449310302734, 72.01834869384766, 73.85321044921875], 'time': [0.7546426188200712, 0.7417433643713593, 0.7734166327863932, 0.7440666975453496, 0.7823817599564791]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.9008931079879403
accuracy: 74.31192016601562
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [74.31192016601562], 'time': [0.9008931079879403]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7624903507530689
accuracy: 76.60549926757812
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [74.31192016601562, 76.60549926757812], 'time': [0.9008931079879403, 0.7624903507530689]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7397908736020327
accuracy: 76.60549926757812
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [74.31192016601562, 76.60549926757812, 76.60549926757812], 'time': [0.9008931079879403, 0.7624903507530689, 0.7397908736020327]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7662565093487501
accuracy: 80.27522277832031
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [74.31192016601562, 76.60549926757812, 76.60549926757812, 80.27522277832031], 'time': [0.9008931079879403, 0.7624903507530689, 0.7397908736020327, 0.7662565093487501]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7505855513736606
accuracy: 77.98165130615234
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [74.31192016601562, 76.60549926757812, 76.60549926757812, 80.27522277832031, 77.98165130615234], 'time': [0.9008931079879403, 0.7624903507530689, 0.7397908736020327, 0.7662565093487501, 0.7505855513736606]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7674235999584198
accuracy: 63.76146697998047
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [63.76146697998047], 'time': [0.7674235999584198]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.767792378552258
accuracy: 55.96329879760742
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [63.76146697998047, 55.96329879760742], 'time': [0.7674235999584198, 0.767792378552258]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7451391331851482
accuracy: 55.045867919921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [63.76146697998047, 55.96329879760742, 55.045867919921875], 'time': [0.7674235999584198, 0.767792378552258, 0.7451391331851482]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7350204102694988
accuracy: 55.96329879760742
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [63.76146697998047, 55.96329879760742, 55.045867919921875, 55.96329879760742], 'time': [0.7674235999584198, 0.767792378552258, 0.7451391331851482, 0.7350204102694988]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7539409128949046
accuracy: 61.00917053222656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [63.76146697998047, 55.96329879760742, 55.045867919921875, 55.96329879760742, 61.00917053222656], 'time': [0.7674235999584198, 0.767792378552258, 0.7451391331851482, 0.7350204102694988, 0.7539409128949046]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7563451640307903
accuracy: 75.22935485839844
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [75.22935485839844], 'time': [0.7563451640307903]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7852337695658207
accuracy: 70.64219665527344
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [75.22935485839844, 70.64219665527344], 'time': [0.7563451640307903, 0.7852337695658207]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.764726473018527
accuracy: 66.97247314453125
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [75.22935485839844, 70.64219665527344, 66.97247314453125], 'time': [0.7563451640307903, 0.7852337695658207, 0.764726473018527]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8283493919298053
accuracy: 69.26605224609375
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [75.22935485839844, 70.64219665527344, 66.97247314453125, 69.26605224609375], 'time': [0.7563451640307903, 0.7852337695658207, 0.764726473018527, 0.8283493919298053]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7426881296560168
accuracy: 70.64219665527344
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [75.22935485839844, 70.64219665527344, 66.97247314453125, 69.26605224609375, 70.64219665527344], 'time': [0.7563451640307903, 0.7852337695658207, 0.764726473018527, 0.8283493919298053, 0.7426881296560168]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.720334829762578
accuracy: 78.44036102294922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922], 'time': [0.720334829762578]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7649507028982043
accuracy: 78.89907836914062
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922, 78.89907836914062], 'time': [0.720334829762578, 0.7649507028982043]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7569811278954148
accuracy: 78.44036102294922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922, 78.89907836914062, 78.44036102294922], 'time': [0.720334829762578, 0.7649507028982043, 0.7569811278954148]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7275939946994185
accuracy: 78.44036102294922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922, 78.89907836914062, 78.44036102294922, 78.44036102294922], 'time': [0.720334829762578, 0.7649507028982043, 0.7569811278954148, 0.7275939946994185]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7099878154695034
accuracy: 78.44036102294922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [78.44036102294922, 78.89907836914062, 78.44036102294922, 78.44036102294922, 78.44036102294922], 'time': [0.720334829762578, 0.7649507028982043, 0.7569811278954148, 0.7275939946994185, 0.7099878154695034]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7192351957783103
accuracy: 28.89908218383789
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [28.89908218383789], 'time': [0.7192351957783103]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7351979054510593
accuracy: 30.27522850036621
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [28.89908218383789, 30.27522850036621], 'time': [0.7192351957783103, 0.7351979054510593]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7818495268002152
accuracy: 30.733943939208984
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [28.89908218383789, 30.27522850036621, 30.733943939208984], 'time': [0.7192351957783103, 0.7351979054510593, 0.7818495268002152]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8035246999934316
accuracy: 27.064218521118164
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [28.89908218383789, 30.27522850036621, 30.733943939208984, 27.064218521118164], 'time': [0.7192351957783103, 0.7351979054510593, 0.7818495268002152, 0.8035246999934316]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7377652740105987
accuracy: 28.89908218383789
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [28.89908218383789, 30.27522850036621, 30.733943939208984, 27.064218521118164, 28.89908218383789], 'time': [0.7192351957783103, 0.7351979054510593, 0.7818495268002152, 0.8035246999934316, 0.7377652740105987]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7085015652701259
accuracy: 40.36697006225586
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [40.36697006225586], 'time': [0.7085015652701259]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7760724863037467
accuracy: 48.62385177612305
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [40.36697006225586, 48.62385177612305], 'time': [0.7085015652701259, 0.7760724863037467]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7815657136961818
accuracy: 43.1192626953125
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [40.36697006225586, 48.62385177612305, 43.1192626953125], 'time': [0.7085015652701259, 0.7760724863037467, 0.7815657136961818]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7101980857551098
accuracy: 28.440364837646484
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [40.36697006225586, 48.62385177612305, 43.1192626953125, 28.440364837646484], 'time': [0.7085015652701259, 0.7760724863037467, 0.7815657136961818, 0.7101980857551098]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7294072257354856
accuracy: 45.412841796875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [40.36697006225586, 48.62385177612305, 43.1192626953125, 28.440364837646484, 45.412841796875], 'time': [0.7085015652701259, 0.7760724863037467, 0.7815657136961818, 0.7101980857551098, 0.7294072257354856]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7750604413449764
accuracy: 56.42201614379883
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [56.42201614379883], 'time': [0.7750604413449764]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.732301308773458
accuracy: 59.17430877685547
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [56.42201614379883, 59.17430877685547], 'time': [0.7750604413449764, 0.732301308773458]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7220716020092368
accuracy: 58.71559524536133
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [56.42201614379883, 59.17430877685547, 58.71559524536133], 'time': [0.7750604413449764, 0.732301308773458, 0.7220716020092368]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7090091900900006
accuracy: 51.37614440917969
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [56.42201614379883, 59.17430877685547, 58.71559524536133, 51.37614440917969], 'time': [0.7750604413449764, 0.732301308773458, 0.7220716020092368, 0.7090091900900006]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7225303035229445
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [56.42201614379883, 59.17430877685547, 58.71559524536133, 51.37614440917969, 61.46788787841797], 'time': [0.7750604413449764, 0.732301308773458, 0.7220716020092368, 0.7090091900900006, 0.7225303035229445]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7215317152440548
accuracy: 18.80733871459961
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [18.80733871459961], 'time': [0.7215317152440548]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7845927970483899
accuracy: 17.889907836914062
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [18.80733871459961, 17.889907836914062], 'time': [0.7215317152440548, 0.7845927970483899]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8200635816901922
accuracy: 30.733943939208984
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [18.80733871459961, 17.889907836914062, 30.733943939208984], 'time': [0.7215317152440548, 0.7845927970483899, 0.8200635816901922]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7741238186135888
accuracy: 27.064218521118164
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [18.80733871459961, 17.889907836914062, 30.733943939208984, 27.064218521118164], 'time': [0.7215317152440548, 0.7845927970483899, 0.8200635816901922, 0.7741238186135888]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7018783334642649
accuracy: 24.770641326904297
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [18.80733871459961, 17.889907836914062, 30.733943939208984, 27.064218521118164, 24.770641326904297], 'time': [0.7215317152440548, 0.7845927970483899, 0.8200635816901922, 0.7741238186135888, 0.7018783334642649]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8400796735659242
accuracy: 47.247703552246094
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.247703552246094], 'time': [0.8400796735659242]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7120823208242655
accuracy: 43.1192626953125
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.247703552246094, 43.1192626953125], 'time': [0.8400796735659242, 0.7120823208242655]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7609143173322082
accuracy: 51.834861755371094
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.247703552246094, 43.1192626953125, 51.834861755371094], 'time': [0.8400796735659242, 0.7120823208242655, 0.7609143173322082]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8003588579595089
accuracy: 40.825687408447266
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.247703552246094, 43.1192626953125, 51.834861755371094, 40.825687408447266], 'time': [0.8400796735659242, 0.7120823208242655, 0.7609143173322082, 0.8003588579595089]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7599322469905019
accuracy: 49.541282653808594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [47.247703552246094, 43.1192626953125, 51.834861755371094, 40.825687408447266, 49.541282653808594], 'time': [0.8400796735659242, 0.7120823208242655, 0.7609143173322082, 0.8003588579595089, 0.7599322469905019]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7914504176005721
accuracy: 65.13761138916016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [65.13761138916016], 'time': [0.7914504176005721]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7917832788079977
accuracy: 56.42201614379883
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [65.13761138916016, 56.42201614379883], 'time': [0.7914504176005721, 0.7917832788079977]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7789273802191019
accuracy: 54.12843704223633
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [65.13761138916016, 56.42201614379883, 54.12843704223633], 'time': [0.7914504176005721, 0.7917832788079977, 0.7789273802191019]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7056914167478681
accuracy: 55.50458526611328
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [65.13761138916016, 56.42201614379883, 54.12843704223633, 55.50458526611328], 'time': [0.7914504176005721, 0.7917832788079977, 0.7789273802191019, 0.7056914167478681]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8628309927880764
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.0, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [65.13761138916016, 56.42201614379883, 54.12843704223633, 55.50458526611328, 60.091739654541016], 'time': [0.7914504176005721, 0.7917832788079977, 0.7789273802191019, 0.7056914167478681, 0.8628309927880764]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7462826734408736
accuracy: 71.10091400146484
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [71.10091400146484], 'time': [0.7462826734408736]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6992839220911264
accuracy: 72.93577575683594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [71.10091400146484, 72.93577575683594], 'time': [0.7462826734408736, 0.6992839220911264]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7646783199161291
accuracy: 69.72476959228516
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [71.10091400146484, 72.93577575683594, 69.72476959228516], 'time': [0.7462826734408736, 0.6992839220911264, 0.7646783199161291]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7820491092279553
accuracy: 75.68807220458984
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [71.10091400146484, 72.93577575683594, 69.72476959228516, 75.68807220458984], 'time': [0.7462826734408736, 0.6992839220911264, 0.7646783199161291, 0.7820491092279553]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8088662791997194
accuracy: 74.31192016601562
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [71.10091400146484, 72.93577575683594, 69.72476959228516, 75.68807220458984, 74.31192016601562], 'time': [0.7462826734408736, 0.6992839220911264, 0.7646783199161291, 0.7820491092279553, 0.8088662791997194]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7398515697568655
accuracy: 79.35779571533203
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [79.35779571533203], 'time': [0.7398515697568655]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6864211428910494
accuracy: 75.68807220458984
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [79.35779571533203, 75.68807220458984], 'time': [0.7398515697568655, 0.6864211428910494]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6811584439128637
accuracy: 77.98165130615234
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [79.35779571533203, 75.68807220458984, 77.98165130615234], 'time': [0.7398515697568655, 0.6864211428910494, 0.6811584439128637]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7588078565895557
accuracy: 79.35779571533203
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [79.35779571533203, 75.68807220458984, 77.98165130615234, 79.35779571533203], 'time': [0.7398515697568655, 0.6864211428910494, 0.6811584439128637, 0.7588078565895557]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7651811661198735
accuracy: 78.44036102294922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [79.35779571533203, 75.68807220458984, 77.98165130615234, 79.35779571533203, 78.44036102294922], 'time': [0.7398515697568655, 0.6864211428910494, 0.6811584439128637, 0.7588078565895557, 0.7651811661198735]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7185257421806455
accuracy: 82.1100845336914
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [82.1100845336914], 'time': [0.7185257421806455]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.776821231469512
accuracy: 80.73394012451172
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [82.1100845336914, 80.73394012451172], 'time': [0.7185257421806455, 0.776821231469512]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6976365083828568
accuracy: 80.73394012451172
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [82.1100845336914, 80.73394012451172, 80.73394012451172], 'time': [0.7185257421806455, 0.776821231469512, 0.6976365083828568]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7799787418916821
accuracy: 82.56880187988281
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [82.1100845336914, 80.73394012451172, 80.73394012451172, 82.56880187988281], 'time': [0.7185257421806455, 0.776821231469512, 0.6976365083828568, 0.7799787418916821]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7892516609281301
accuracy: 82.56880187988281
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [82.1100845336914, 80.73394012451172, 80.73394012451172, 82.56880187988281, 82.56880187988281], 'time': [0.7185257421806455, 0.776821231469512, 0.6976365083828568, 0.7799787418916821, 0.7892516609281301]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7744314828887582
accuracy: 67.88990783691406
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [67.88990783691406], 'time': [0.7744314828887582]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7507335338741541
accuracy: 69.26605224609375
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [67.88990783691406, 69.26605224609375], 'time': [0.7744314828887582, 0.7507335338741541]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7064444534480572
accuracy: 75.22935485839844
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [67.88990783691406, 69.26605224609375, 75.22935485839844], 'time': [0.7744314828887582, 0.7507335338741541, 0.7064444534480572]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.778078175149858
accuracy: 74.77063751220703
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [67.88990783691406, 69.26605224609375, 75.22935485839844, 74.77063751220703], 'time': [0.7744314828887582, 0.7507335338741541, 0.7064444534480572, 0.778078175149858]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7239864589646459
accuracy: 76.60549926757812
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [67.88990783691406, 69.26605224609375, 75.22935485839844, 74.77063751220703, 76.60549926757812], 'time': [0.7744314828887582, 0.7507335338741541, 0.7064444534480572, 0.778078175149858, 0.7239864589646459]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7522653611376882
accuracy: 77.98165130615234
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [77.98165130615234], 'time': [0.7522653611376882]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7558674123138189
accuracy: 78.89907836914062
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [77.98165130615234, 78.89907836914062], 'time': [0.7522653611376882, 0.7558674123138189]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7410754002630711
accuracy: 80.73394012451172
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [77.98165130615234, 78.89907836914062, 80.73394012451172], 'time': [0.7522653611376882, 0.7558674123138189, 0.7410754002630711]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7932197283953428
accuracy: 77.06421661376953
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [77.98165130615234, 78.89907836914062, 80.73394012451172, 77.06421661376953], 'time': [0.7522653611376882, 0.7558674123138189, 0.7410754002630711, 0.7932197283953428]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7539164703339338
accuracy: 80.27522277832031
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [77.98165130615234, 78.89907836914062, 80.73394012451172, 77.06421661376953, 80.27522277832031], 'time': [0.7522653611376882, 0.7558674123138189, 0.7410754002630711, 0.7932197283953428, 0.7539164703339338]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7127028992399573
accuracy: 80.27522277832031
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [80.27522277832031], 'time': [0.7127028992399573]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7706176498904824
accuracy: 79.81651306152344
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [80.27522277832031, 79.81651306152344], 'time': [0.7127028992399573, 0.7706176498904824]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7755268355831504
accuracy: 80.73394012451172
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [80.27522277832031, 79.81651306152344, 80.73394012451172], 'time': [0.7127028992399573, 0.7706176498904824, 0.7755268355831504]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.728757806122303
accuracy: 81.65137481689453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [80.27522277832031, 79.81651306152344, 80.73394012451172, 81.65137481689453], 'time': [0.7127028992399573, 0.7706176498904824, 0.7755268355831504, 0.728757806122303]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.728295074775815
accuracy: 81.65137481689453
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [80.27522277832031, 79.81651306152344, 80.73394012451172, 81.65137481689453, 81.65137481689453], 'time': [0.7127028992399573, 0.7706176498904824, 0.7755268355831504, 0.728757806122303, 0.728295074775815]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6978537049144506
accuracy: 51.37614440917969
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [51.37614440917969], 'time': [0.6978537049144506]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7620372483506799
accuracy: 46.78899002075195
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [51.37614440917969, 46.78899002075195], 'time': [0.6978537049144506, 0.7620372483506799]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7209999626502395
accuracy: 44.95412826538086
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [51.37614440917969, 46.78899002075195, 44.95412826538086], 'time': [0.6978537049144506, 0.7620372483506799, 0.7209999626502395]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7098022038117051
accuracy: 49.541282653808594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [51.37614440917969, 46.78899002075195, 44.95412826538086, 49.541282653808594], 'time': [0.6978537049144506, 0.7620372483506799, 0.7209999626502395, 0.7098022038117051]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7348806280642748
accuracy: 48.16513442993164
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [51.37614440917969, 46.78899002075195, 44.95412826538086, 49.541282653808594, 48.16513442993164], 'time': [0.6978537049144506, 0.7620372483506799, 0.7209999626502395, 0.7098022038117051, 0.7348806280642748]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8080277489498258
accuracy: 54.587154388427734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [54.587154388427734], 'time': [0.8080277489498258]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7029081471264362
accuracy: 54.587154388427734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [54.587154388427734, 54.587154388427734], 'time': [0.8080277489498258, 0.7029081471264362]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8307297518476844
accuracy: 54.587154388427734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [54.587154388427734, 54.587154388427734, 54.587154388427734], 'time': [0.8080277489498258, 0.7029081471264362, 0.8307297518476844]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6922641014680266
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [54.587154388427734, 54.587154388427734, 54.587154388427734, 61.46788787841797], 'time': [0.8080277489498258, 0.7029081471264362, 0.8307297518476844, 0.6922641014680266]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7345471605658531
accuracy: 64.22018432617188
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [54.587154388427734, 54.587154388427734, 54.587154388427734, 61.46788787841797, 64.22018432617188], 'time': [0.8080277489498258, 0.7029081471264362, 0.8307297518476844, 0.6922641014680266, 0.7345471605658531]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.763525728136301
accuracy: 68.34861755371094
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [68.34861755371094], 'time': [0.763525728136301]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.763863799162209
accuracy: 69.26605224609375
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [68.34861755371094, 69.26605224609375], 'time': [0.763525728136301, 0.763863799162209]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7138637313619256
accuracy: 72.01834869384766
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [68.34861755371094, 69.26605224609375, 72.01834869384766], 'time': [0.763525728136301, 0.763863799162209, 0.7138637313619256]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8200020575895905
accuracy: 71.10091400146484
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [68.34861755371094, 69.26605224609375, 72.01834869384766, 71.10091400146484], 'time': [0.763525728136301, 0.763863799162209, 0.7138637313619256, 0.8200020575895905]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7573280418291688
accuracy: 78.44036102294922
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [68.34861755371094, 69.26605224609375, 72.01834869384766, 71.10091400146484, 78.44036102294922], 'time': [0.763525728136301, 0.763863799162209, 0.7138637313619256, 0.8200020575895905, 0.7573280418291688]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7479517981410027
accuracy: 46.78899002075195
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [46.78899002075195], 'time': [0.7479517981410027]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7873763050884008
accuracy: 45.871559143066406
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [46.78899002075195, 45.871559143066406], 'time': [0.7479517981410027, 0.7873763050884008]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7433281932026148
accuracy: 45.871559143066406
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [46.78899002075195, 45.871559143066406, 45.871559143066406], 'time': [0.7479517981410027, 0.7873763050884008, 0.7433281932026148]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8622826961800456
accuracy: 49.541282653808594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [46.78899002075195, 45.871559143066406, 45.871559143066406, 49.541282653808594], 'time': [0.7479517981410027, 0.7873763050884008, 0.7433281932026148, 0.8622826961800456]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7035407545045018
accuracy: 45.871559143066406
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [46.78899002075195, 45.871559143066406, 45.871559143066406, 49.541282653808594, 45.871559143066406], 'time': [0.7479517981410027, 0.7873763050884008, 0.7433281932026148, 0.8622826961800456, 0.7035407545045018]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7635846808552742
accuracy: 54.587154388427734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [54.587154388427734], 'time': [0.7635846808552742]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.745001352392137
accuracy: 53.66972351074219
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [54.587154388427734, 53.66972351074219], 'time': [0.7635846808552742, 0.745001352392137]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7194110611453652
accuracy: 62.385318756103516
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [54.587154388427734, 53.66972351074219, 62.385318756103516], 'time': [0.7635846808552742, 0.745001352392137, 0.7194110611453652]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7506937198340893
accuracy: 61.92660140991211
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [54.587154388427734, 53.66972351074219, 62.385318756103516, 61.92660140991211], 'time': [0.7635846808552742, 0.745001352392137, 0.7194110611453652, 0.7506937198340893]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7248701686039567
accuracy: 55.50458526611328
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [54.587154388427734, 53.66972351074219, 62.385318756103516, 61.92660140991211, 55.50458526611328], 'time': [0.7635846808552742, 0.745001352392137, 0.7194110611453652, 0.7506937198340893, 0.7248701686039567]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7711504297330976
accuracy: 71.55963134765625
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [71.55963134765625], 'time': [0.7711504297330976]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7038641134276986
accuracy: 69.72476959228516
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [71.55963134765625, 69.72476959228516], 'time': [0.7711504297330976, 0.7038641134276986]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7918552039191127
accuracy: 75.22935485839844
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [71.55963134765625, 69.72476959228516, 75.22935485839844], 'time': [0.7711504297330976, 0.7038641134276986, 0.7918552039191127]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7390338107943535
accuracy: 74.31192016601562
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [71.55963134765625, 69.72476959228516, 75.22935485839844, 74.31192016601562], 'time': [0.7711504297330976, 0.7038641134276986, 0.7918552039191127, 0.7390338107943535]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7051882436499
accuracy: 74.77063751220703
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.1, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [71.55963134765625, 69.72476959228516, 75.22935485839844, 74.31192016601562, 74.77063751220703], 'time': [0.7711504297330976, 0.7038641134276986, 0.7918552039191127, 0.7390338107943535, 0.7051882436499]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7077665170654655
accuracy: 66.51375579833984
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [66.51375579833984], 'time': [0.7077665170654655]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8469348279759288
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [66.51375579833984, 60.091739654541016], 'time': [0.7077665170654655, 0.8469348279759288]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7901794202625751
accuracy: 62.385318756103516
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [66.51375579833984, 60.091739654541016, 62.385318756103516], 'time': [0.7077665170654655, 0.8469348279759288, 0.7901794202625751]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7472472479566932
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [66.51375579833984, 60.091739654541016, 62.385318756103516, 61.46788787841797], 'time': [0.7077665170654655, 0.8469348279759288, 0.7901794202625751, 0.7472472479566932]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.708527933806181
accuracy: 61.92660140991211
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [66.51375579833984, 60.091739654541016, 62.385318756103516, 61.46788787841797, 61.92660140991211], 'time': [0.7077665170654655, 0.8469348279759288, 0.7901794202625751, 0.7472472479566932, 0.708527933806181]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7049868162721395
accuracy: 72.01834869384766
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [72.01834869384766], 'time': [0.7049868162721395]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8033284451812506
accuracy: 70.18348693847656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [72.01834869384766, 70.18348693847656], 'time': [0.7049868162721395, 0.8033284451812506]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7428996684029698
accuracy: 69.72476959228516
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [72.01834869384766, 70.18348693847656, 69.72476959228516], 'time': [0.7049868162721395, 0.8033284451812506, 0.7428996684029698]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7687972690910101
accuracy: 70.18348693847656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [72.01834869384766, 70.18348693847656, 69.72476959228516, 70.18348693847656], 'time': [0.7049868162721395, 0.8033284451812506, 0.7428996684029698, 0.7687972690910101]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7644395399838686
accuracy: 70.18348693847656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [72.01834869384766, 70.18348693847656, 69.72476959228516, 70.18348693847656, 70.18348693847656], 'time': [0.7049868162721395, 0.8033284451812506, 0.7428996684029698, 0.7687972690910101, 0.7644395399838686]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6895883632823825
accuracy: 76.14678192138672
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672], 'time': [0.6895883632823825]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.737330892123282
accuracy: 75.68807220458984
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672, 75.68807220458984], 'time': [0.6895883632823825, 0.737330892123282]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7560845520347357
accuracy: 76.60549926757812
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672, 75.68807220458984, 76.60549926757812], 'time': [0.6895883632823825, 0.737330892123282, 0.7560845520347357]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7160661229863763
accuracy: 77.06421661376953
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672, 75.68807220458984, 76.60549926757812, 77.06421661376953], 'time': [0.6895883632823825, 0.737330892123282, 0.7560845520347357, 0.7160661229863763]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8812878672033548
accuracy: 77.06421661376953
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [76.14678192138672, 75.68807220458984, 76.60549926757812, 77.06421661376953, 77.06421661376953], 'time': [0.6895883632823825, 0.737330892123282, 0.7560845520347357, 0.7160661229863763, 0.8812878672033548]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6804515710100532
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797], 'time': [0.6804515710100532]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.770328663289547
accuracy: 57.339447021484375
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797, 57.339447021484375], 'time': [0.6804515710100532, 0.770328663289547]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8078582873567939
accuracy: 59.17430877685547
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797, 57.339447021484375, 59.17430877685547], 'time': [0.6804515710100532, 0.770328663289547, 0.8078582873567939]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.710809325799346
accuracy: 58.71559524536133
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797, 57.339447021484375, 59.17430877685547, 58.71559524536133], 'time': [0.6804515710100532, 0.770328663289547, 0.8078582873567939, 0.710809325799346]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6992192026227713
accuracy: 65.13761138916016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797, 57.339447021484375, 59.17430877685547, 58.71559524536133, 65.13761138916016], 'time': [0.6804515710100532, 0.770328663289547, 0.8078582873567939, 0.710809325799346, 0.6992192026227713]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6889346782118082
accuracy: 63.30274963378906
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [63.30274963378906], 'time': [0.6889346782118082]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.698128622956574
accuracy: 71.55963134765625
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [63.30274963378906, 71.55963134765625], 'time': [0.6889346782118082, 0.698128622956574]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7274452932178974
accuracy: 67.88990783691406
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [63.30274963378906, 71.55963134765625, 67.88990783691406], 'time': [0.6889346782118082, 0.698128622956574, 0.7274452932178974]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7881485754624009
accuracy: 67.43119049072266
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [63.30274963378906, 71.55963134765625, 67.88990783691406, 67.43119049072266], 'time': [0.6889346782118082, 0.698128622956574, 0.7274452932178974, 0.7881485754624009]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7022469565272331
accuracy: 66.51375579833984
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [63.30274963378906, 71.55963134765625, 67.88990783691406, 67.43119049072266, 66.51375579833984], 'time': [0.6889346782118082, 0.698128622956574, 0.7274452932178974, 0.7881485754624009, 0.7022469565272331]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6899666525423527
accuracy: 73.39449310302734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [73.39449310302734], 'time': [0.6899666525423527]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8150058528408408
accuracy: 77.06421661376953
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [73.39449310302734, 77.06421661376953], 'time': [0.6899666525423527, 0.8150058528408408]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.695906188338995
accuracy: 76.14678192138672
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [73.39449310302734, 77.06421661376953, 76.14678192138672], 'time': [0.6899666525423527, 0.8150058528408408, 0.695906188338995]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.713997982442379
accuracy: 75.22935485839844
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [73.39449310302734, 77.06421661376953, 76.14678192138672, 75.22935485839844], 'time': [0.6899666525423527, 0.8150058528408408, 0.695906188338995, 0.713997982442379]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6549661410972476
accuracy: 77.52293395996094
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [73.39449310302734, 77.06421661376953, 76.14678192138672, 75.22935485839844, 77.52293395996094], 'time': [0.6899666525423527, 0.8150058528408408, 0.695906188338995, 0.713997982442379, 0.6549661410972476]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7609838545322418
accuracy: 56.88072967529297
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [56.88072967529297], 'time': [0.7609838545322418]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7417885176837444
accuracy: 52.293575286865234
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [56.88072967529297, 52.293575286865234], 'time': [0.7609838545322418, 0.7417885176837444]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6967047909274697
accuracy: 54.587154388427734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [56.88072967529297, 52.293575286865234, 54.587154388427734], 'time': [0.7609838545322418, 0.7417885176837444, 0.6967047909274697]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7675549248233438
accuracy: 46.33027267456055
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [56.88072967529297, 52.293575286865234, 54.587154388427734, 46.33027267456055], 'time': [0.7609838545322418, 0.7417885176837444, 0.6967047909274697, 0.7675549248233438]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6995743894949555
accuracy: 44.49541091918945
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [56.88072967529297, 52.293575286865234, 54.587154388427734, 46.33027267456055, 44.49541091918945], 'time': [0.7609838545322418, 0.7417885176837444, 0.6967047909274697, 0.7675549248233438, 0.6995743894949555]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.759836457669735
accuracy: 58.71559524536133
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [58.71559524536133], 'time': [0.759836457669735]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6933288481086493
accuracy: 55.045867919921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [58.71559524536133, 55.045867919921875], 'time': [0.759836457669735, 0.6933288481086493]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7263298416510224
accuracy: 56.88072967529297
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [58.71559524536133, 55.045867919921875, 56.88072967529297], 'time': [0.759836457669735, 0.6933288481086493, 0.7263298416510224]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7247612336650491
accuracy: 50.45871353149414
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [58.71559524536133, 55.045867919921875, 56.88072967529297, 50.45871353149414], 'time': [0.759836457669735, 0.6933288481086493, 0.7263298416510224, 0.7247612336650491]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8268410684540868
accuracy: 49.541282653808594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [58.71559524536133, 55.045867919921875, 56.88072967529297, 50.45871353149414, 49.541282653808594], 'time': [0.759836457669735, 0.6933288481086493, 0.7263298416510224, 0.7247612336650491, 0.8268410684540868]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6971759935840964
accuracy: 64.22018432617188
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [64.22018432617188], 'time': [0.6971759935840964]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6998198768123984
accuracy: 60.091739654541016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [64.22018432617188, 60.091739654541016], 'time': [0.6971759935840964, 0.6998198768123984]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7487272741273046
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [64.22018432617188, 60.091739654541016, 61.46788787841797], 'time': [0.6971759935840964, 0.6998198768123984, 0.7487272741273046]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7142944624647498
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [64.22018432617188, 60.091739654541016, 61.46788787841797, 61.46788787841797], 'time': [0.6971759935840964, 0.6998198768123984, 0.7487272741273046, 0.7142944624647498]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7610450861975551
accuracy: 62.385318756103516
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [64.22018432617188, 60.091739654541016, 61.46788787841797, 61.46788787841797, 62.385318756103516], 'time': [0.6971759935840964, 0.6998198768123984, 0.7487272741273046, 0.7142944624647498, 0.7610450861975551]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7375222984701395
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797], 'time': [0.7375222984701395]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8302512150257826
accuracy: 38.07339096069336
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797, 38.07339096069336], 'time': [0.7375222984701395, 0.8302512150257826]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7906291671097279
accuracy: 35.32109832763672
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797, 38.07339096069336, 35.32109832763672], 'time': [0.7375222984701395, 0.8302512150257826, 0.7906291671097279]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7794102663174272
accuracy: 38.532108306884766
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797, 38.07339096069336, 35.32109832763672, 38.532108306884766], 'time': [0.7375222984701395, 0.8302512150257826, 0.7906291671097279, 0.7794102663174272]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7799209300428629
accuracy: 63.76146697998047
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797, 38.07339096069336, 35.32109832763672, 38.532108306884766, 63.76146697998047], 'time': [0.7375222984701395, 0.8302512150257826, 0.7906291671097279, 0.7794102663174272, 0.7799209300428629]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7131613763049245
accuracy: 51.834861755371094
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [51.834861755371094], 'time': [0.7131613763049245]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7419713139533997
accuracy: 44.49541091918945
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [51.834861755371094, 44.49541091918945], 'time': [0.7131613763049245, 0.7419713139533997]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8713372815400362
accuracy: 46.78899002075195
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [51.834861755371094, 44.49541091918945, 46.78899002075195], 'time': [0.7131613763049245, 0.7419713139533997, 0.8713372815400362]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7841059491038322
accuracy: 43.1192626953125
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [51.834861755371094, 44.49541091918945, 46.78899002075195, 43.1192626953125], 'time': [0.7131613763049245, 0.7419713139533997, 0.8713372815400362, 0.7841059491038322]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.692131775431335
accuracy: 63.76146697998047
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [51.834861755371094, 44.49541091918945, 46.78899002075195, 43.1192626953125, 63.76146697998047], 'time': [0.7131613763049245, 0.7419713139533997, 0.8713372815400362, 0.7841059491038322, 0.692131775431335]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6807467499747872
accuracy: 25.22935676574707
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [25.22935676574707], 'time': [0.6807467499747872]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.708630483597517
accuracy: 58.25687789916992
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [25.22935676574707, 58.25687789916992], 'time': [0.6807467499747872, 0.708630483597517]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8544198600575328
accuracy: 57.79816436767578
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [25.22935676574707, 58.25687789916992, 57.79816436767578], 'time': [0.6807467499747872, 0.708630483597517, 0.8544198600575328]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7308837631717324
accuracy: 57.79816436767578
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [25.22935676574707, 58.25687789916992, 57.79816436767578, 57.79816436767578], 'time': [0.6807467499747872, 0.708630483597517, 0.8544198600575328, 0.7308837631717324]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7280571674928069
accuracy: 62.844032287597656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.5, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [25.22935676574707, 58.25687789916992, 57.79816436767578, 57.79816436767578, 62.844032287597656], 'time': [0.6807467499747872, 0.708630483597517, 0.8544198600575328, 0.7308837631717324, 0.7280571674928069]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6834773374721408
accuracy: 18.80733871459961
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [18.80733871459961], 'time': [0.6834773374721408]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7096646279096603
accuracy: 57.79816436767578
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [18.80733871459961, 57.79816436767578], 'time': [0.6834773374721408, 0.7096646279096603]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6978373723104596
accuracy: 60.55045700073242
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [18.80733871459961, 57.79816436767578, 60.55045700073242], 'time': [0.6834773374721408, 0.7096646279096603, 0.6978373723104596]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7954800082370639
accuracy: 56.42201614379883
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [18.80733871459961, 57.79816436767578, 60.55045700073242, 56.42201614379883], 'time': [0.6834773374721408, 0.7096646279096603, 0.6978373723104596, 0.7954800082370639]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8657328365370631
accuracy: 56.42201614379883
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [18.80733871459961, 57.79816436767578, 60.55045700073242, 56.42201614379883, 56.42201614379883], 'time': [0.6834773374721408, 0.7096646279096603, 0.6978373723104596, 0.7954800082370639, 0.8657328365370631]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7479535387828946
accuracy: 59.633026123046875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [59.633026123046875], 'time': [0.7479535387828946]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.750487606972456
accuracy: 64.67889404296875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [59.633026123046875, 64.67889404296875], 'time': [0.7479535387828946, 0.750487606972456]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7671553334221244
accuracy: 65.59632873535156
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [59.633026123046875, 64.67889404296875, 65.59632873535156], 'time': [0.7479535387828946, 0.750487606972456, 0.7671553334221244]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8041273895651102
accuracy: 65.13761138916016
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [59.633026123046875, 64.67889404296875, 65.59632873535156, 65.13761138916016], 'time': [0.7479535387828946, 0.750487606972456, 0.7671553334221244, 0.8041273895651102]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7962644258514047
accuracy: 72.01834869384766
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [59.633026123046875, 64.67889404296875, 65.59632873535156, 65.13761138916016, 72.01834869384766], 'time': [0.7479535387828946, 0.750487606972456, 0.7671553334221244, 0.8041273895651102, 0.7962644258514047]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7561849299818277
accuracy: 71.10091400146484
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [71.10091400146484], 'time': [0.7561849299818277]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7378010461106896
accuracy: 73.85321044921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [71.10091400146484, 73.85321044921875], 'time': [0.7561849299818277, 0.7378010461106896]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7149936277419329
accuracy: 74.77063751220703
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [71.10091400146484, 73.85321044921875, 74.77063751220703], 'time': [0.7561849299818277, 0.7378010461106896, 0.7149936277419329]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7217974057421088
accuracy: 77.52293395996094
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [71.10091400146484, 73.85321044921875, 74.77063751220703, 77.52293395996094], 'time': [0.7561849299818277, 0.7378010461106896, 0.7149936277419329, 0.7217974057421088]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6917505068704486
accuracy: 77.98165130615234
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [71.10091400146484, 73.85321044921875, 74.77063751220703, 77.52293395996094, 77.98165130615234], 'time': [0.7561849299818277, 0.7378010461106896, 0.7149936277419329, 0.7217974057421088, 0.6917505068704486]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6605508700013161
accuracy: 12.385320663452148
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [12.385320663452148], 'time': [0.6605508700013161]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7331121079623699
accuracy: 59.17430877685547
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [12.385320663452148, 59.17430877685547], 'time': [0.6605508700013161, 0.7331121079623699]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8435182608664036
accuracy: 52.75229263305664
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [12.385320663452148, 59.17430877685547, 52.75229263305664], 'time': [0.6605508700013161, 0.7331121079623699, 0.8435182608664036]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7103065680712461
accuracy: 51.37614440917969
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [12.385320663452148, 59.17430877685547, 52.75229263305664, 51.37614440917969], 'time': [0.6605508700013161, 0.7331121079623699, 0.8435182608664036, 0.7103065680712461]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6721517350524664
accuracy: 53.66972351074219
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [12.385320663452148, 59.17430877685547, 52.75229263305664, 51.37614440917969, 53.66972351074219], 'time': [0.6605508700013161, 0.7331121079623699, 0.8435182608664036, 0.7103065680712461, 0.6721517350524664]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7136356690898538
accuracy: 24.770641326904297
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [24.770641326904297], 'time': [0.7136356690898538]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6911875959485769
accuracy: 67.88990783691406
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [24.770641326904297, 67.88990783691406], 'time': [0.7136356690898538, 0.6911875959485769]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7286559753119946
accuracy: 67.43119049072266
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [24.770641326904297, 67.88990783691406, 67.43119049072266], 'time': [0.7136356690898538, 0.6911875959485769, 0.7286559753119946]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7647938672453165
accuracy: 62.844032287597656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [24.770641326904297, 67.88990783691406, 67.43119049072266, 62.844032287597656], 'time': [0.7136356690898538, 0.6911875959485769, 0.7286559753119946, 0.7647938672453165]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7201898526400328
accuracy: 49.08256530761719
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [24.770641326904297, 67.88990783691406, 67.43119049072266, 62.844032287597656, 49.08256530761719], 'time': [0.7136356690898538, 0.6911875959485769, 0.7286559753119946, 0.7647938672453165, 0.7201898526400328]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6478355098515749
accuracy: 3.2110090255737305
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [3.2110090255737305], 'time': [0.6478355098515749]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7276058457791805
accuracy: 77.98165130615234
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [3.2110090255737305, 77.98165130615234], 'time': [0.6478355098515749, 0.7276058457791805]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7021713284775615
accuracy: 76.60549926757812
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [3.2110090255737305, 77.98165130615234, 76.60549926757812], 'time': [0.6478355098515749, 0.7276058457791805, 0.7021713284775615]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7741093467921019
accuracy: 70.64219665527344
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [3.2110090255737305, 77.98165130615234, 76.60549926757812, 70.64219665527344], 'time': [0.6478355098515749, 0.7276058457791805, 0.7021713284775615, 0.7741093467921019]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7553377505391836
accuracy: 50.45871353149414
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 0.1, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [3.2110090255737305, 77.98165130615234, 76.60549926757812, 70.64219665527344, 50.45871353149414], 'time': [0.6478355098515749, 0.7276058457791805, 0.7021713284775615, 0.7741093467921019, 0.7553377505391836]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.8086278280243278
accuracy: 61.46788787841797
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797], 'time': [0.8086278280243278]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7384754344820976
accuracy: 51.37614440917969
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797, 51.37614440917969], 'time': [0.8086278280243278, 0.7384754344820976]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.772357183508575
accuracy: 62.844032287597656
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797, 51.37614440917969, 62.844032287597656], 'time': [0.8086278280243278, 0.7384754344820976, 0.772357183508575]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7271115258336067
accuracy: 47.247703552246094
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797, 51.37614440917969, 62.844032287597656, 47.247703552246094], 'time': [0.8086278280243278, 0.7384754344820976, 0.772357183508575, 0.7271115258336067]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7943942230194807
accuracy: 44.49541091918945
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [61.46788787841797, 51.37614440917969, 62.844032287597656, 47.247703552246094, 44.49541091918945], 'time': [0.8086278280243278, 0.7384754344820976, 0.772357183508575, 0.7271115258336067, 0.7943942230194807]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7258976576849818
accuracy: 56.88072967529297
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [56.88072967529297], 'time': [0.7258976576849818]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7537799756973982
accuracy: 51.834861755371094
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [56.88072967529297, 51.834861755371094], 'time': [0.7258976576849818, 0.7537799756973982]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6760690407827497
accuracy: 60.55045700073242
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [56.88072967529297, 51.834861755371094, 60.55045700073242], 'time': [0.7258976576849818, 0.7537799756973982, 0.6760690407827497]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7331203278154135
accuracy: 51.37614440917969
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [56.88072967529297, 51.834861755371094, 60.55045700073242, 51.37614440917969], 'time': [0.7258976576849818, 0.7537799756973982, 0.6760690407827497, 0.7331203278154135]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.73955147434026
accuracy: 49.541282653808594
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [56.88072967529297, 51.834861755371094, 60.55045700073242, 51.37614440917969, 49.541282653808594], 'time': [0.7258976576849818, 0.7537799756973982, 0.6760690407827497, 0.7331203278154135, 0.73955147434026]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6732055749744177
accuracy: 56.88072967529297
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [56.88072967529297], 'time': [0.6732055749744177]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6888116234913468
accuracy: 55.50458526611328
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [56.88072967529297, 55.50458526611328], 'time': [0.6732055749744177, 0.6888116234913468]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.9042959986254573
accuracy: 58.71559524536133
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [56.88072967529297, 55.50458526611328, 58.71559524536133], 'time': [0.6732055749744177, 0.6888116234913468, 0.9042959986254573]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7401770064607263
accuracy: 55.50458526611328
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [56.88072967529297, 55.50458526611328, 58.71559524536133, 55.50458526611328], 'time': [0.6732055749744177, 0.6888116234913468, 0.9042959986254573, 0.7401770064607263]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7603365499526262
accuracy: 56.42201614379883
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.0, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [56.88072967529297, 55.50458526611328, 58.71559524536133, 55.50458526611328, 56.42201614379883], 'time': [0.6732055749744177, 0.6888116234913468, 0.9042959986254573, 0.7401770064607263, 0.7603365499526262]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6340726800262928
accuracy: 25.688072204589844
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [25.688072204589844], 'time': [0.6340726800262928]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7079671192914248
accuracy: 31.192659378051758
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [25.688072204589844, 31.192659378051758], 'time': [0.6340726800262928, 0.7079671192914248]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7491265032440424
accuracy: 30.27522850036621
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [25.688072204589844, 31.192659378051758, 30.27522850036621], 'time': [0.6340726800262928, 0.7079671192914248, 0.7491265032440424]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7192012295126915
accuracy: 30.27522850036621
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [25.688072204589844, 31.192659378051758, 30.27522850036621, 30.27522850036621], 'time': [0.6340726800262928, 0.7079671192914248, 0.7491265032440424, 0.7192012295126915]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6676253592595458
accuracy: 57.339447021484375
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 0.1}, 'accuracy': [25.688072204589844, 31.192659378051758, 30.27522850036621, 30.27522850036621, 57.339447021484375], 'time': [0.6340726800262928, 0.7079671192914248, 0.7491265032440424, 0.7192012295126915, 0.6676253592595458]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6469981679692864
accuracy: 30.27522850036621
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [30.27522850036621], 'time': [0.6469981679692864]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7846032604575157
accuracy: 44.95412826538086
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [30.27522850036621, 44.95412826538086], 'time': [0.6469981679692864, 0.7846032604575157]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7866819873452187
accuracy: 39.44953918457031
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [30.27522850036621, 44.95412826538086, 39.44953918457031], 'time': [0.6469981679692864, 0.7846032604575157, 0.7866819873452187]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7332699382677674
accuracy: 22.018346786499023
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [30.27522850036621, 44.95412826538086, 39.44953918457031, 22.018346786499023], 'time': [0.6469981679692864, 0.7846032604575157, 0.7866819873452187, 0.7332699382677674]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7323137260973454
accuracy: 42.20183181762695
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 1.0}, 'accuracy': [30.27522850036621, 44.95412826538086, 39.44953918457031, 22.018346786499023, 42.20183181762695], 'time': [0.6469981679692864, 0.7846032604575157, 0.7866819873452187, 0.7332699382677674, 0.7323137260973454]}


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7177361743524671
accuracy: 36.69724655151367
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [36.69724655151367], 'time': [0.7177361743524671]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7457960853353143
accuracy: 57.339447021484375
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [36.69724655151367, 57.339447021484375], 'time': [0.7177361743524671, 0.7457960853353143]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.7898914348334074
accuracy: 55.045867919921875
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [36.69724655151367, 57.339447021484375, 55.045867919921875], 'time': [0.7177361743524671, 0.7457960853353143, 0.7898914348334074]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.747077438980341
accuracy: 49.999996185302734
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [36.69724655151367, 57.339447021484375, 55.045867919921875, 49.999996185302734], 'time': [0.7177361743524671, 0.7457960853353143, 0.7898914348334074, 0.747077438980341]}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model downloaded: bert-base-uncased
training time: 0.6860917471349239
accuracy: 53.66972351074219
{'params': {'reservoir_dim': 1000, 'spectral_radius': 1.5, 'leaking_rate': 0.9, 'input_scaling': 1.0, 'bias_scaling': 0.0, 'sparsity': 0.99, 'activation_function': 'relu', 'alpha': 10.0}, 'accuracy': [36.69724655151367, 57.339447021484375, 55.045867919921875, 49.999996185302734, 53.66972351074219], 'time': [0.7177361743524671, 0.7457960853353143, 0.7898914348334074, 0.747077438980341, 0.6860917471349239]}


In [24]:
# esn_results_l

In [22]:
for d in esn_results_l:
    d['accuracy_mean'] = np.mean(d['accuracy'])
    d['accuracy_std'] = np.std(d['accuracy'])

In [27]:
with open(os.path.join(RESULTS_PATH, RESULTS_FILE), 'wb') as fh:
    pickle.dump(esn_results_l, fh)

In [6]:
# with open(os.path.join(RESULTS_PATH, RESULTS_FILE), 'rb') as fh:
#     esn_results_l = pickle.load(fh)