In [None]:
!pip install wandb
!pip install simpletransformers

Collecting wandb
[?25l  Downloading https://files.pythonhosted.org/packages/47/af/4cfe48fe55046181b992251933cff4ceb3bfd71a42838f5fe683683cd925/wandb-0.10.25-py2.py3-none-any.whl (2.1MB)
[K     |████████████████████████████████| 2.1MB 5.9MB/s 
[?25hCollecting subprocess32>=3.5.3
[?25l  Downloading https://files.pythonhosted.org/packages/32/c8/564be4d12629b912ea431f1a50eb8b3b9d00f1a0b1ceff17f266be190007/subprocess32-3.5.4.tar.gz (97kB)
[K     |████████████████████████████████| 102kB 10.4MB/s 
Collecting sentry-sdk>=0.4.0
[?25l  Downloading https://files.pythonhosted.org/packages/f3/92/5a33be64990ba815364a8f2dd9e6f51de60d23dfddafb4f1fc5577d4dc64/sentry_sdk-1.0.0-py2.py3-none-any.whl (131kB)
[K     |████████████████████████████████| 133kB 17.3MB/s 
[?25hCollecting shortuuid>=0.5.0
  Downloading https://files.pythonhosted.org/packages/25/a6/2ecc1daa6a304e7f1b216f0896b26156b78e7c38e1211e9b798b4716c53d/shortuuid-1.0.1-py3-none-any.whl
Collecting GitPython>=1.0.0
[?25l  Downloading ht

In [None]:
import logging

import pandas as pd
import sklearn
import wandb

import nltk
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from simpletransformers.classification import ClassificationArgs, ClassificationModel


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
sweep_config = {
    "method": "random",  # bayes, grid, random
    "metric": {"name": "train_loss", "goal": "minimize"},
    "parameters": {"manual_seed": {"values": [20,40,60,80,100]},
                   "num_train_epochs": {"values": [2,5,8,10]}},
    "early_terminate": {"type": "hyperband", "min_iter": 6},
}

# layer_parameters = {f"layer_{i}-{i + 6}": {"min": 0.0, "max": 5e-5} for i in range(0, 24, 6)}


# sweep_config = {
#     "method": "bayes",
#     "metric": {"name": "train_loss", "goal": "minimize"},
#     "parameters": {
#         "num_train_epochs": {"min": 5, "max": 15},
#         "params_classifier.dense.weight": {"min": 0, "max": 1e-3},
#         "params_classifier.dense.bias": {"min": 0, "max": 1e-3},
#         "params_classifier.out_proj.weight": {"min": 0, "max": 1e-3},
#         "params_classifier.out_proj.bias": {"min": 0, "max": 1e-3},
#         **layer_parameters,
#     },
#     "early_terminate": {"type": "hyperband", "min_iter": 6},
# }

# "parameters": {"num_train_epochs": {"values": [2,5,8,10]}, "learning_rate": {"min": 4e-6, "max": 4e-5}},

sweep_id = wandb.sweep(sweep_config, project="Simple Sweep_3")

Create sweep with ID: yl6mc9ax
Sweep URL: https://wandb.ai/ddave94/Simple%20Sweep_3/sweeps/yl6mc9ax


In [None]:
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

### Importing Data

In [None]:
#Creating the dataframes
kiva_train = pd.read_csv('https://raw.githubusercontent.com/DDave94/Loan-Default-Prediction/main/kiva_train.csv')
kiva_test = pd.read_csv('https://raw.githubusercontent.com/DDave94/Loan-Default-Prediction/main/kiva_test.csv')

#Formatting kaggle dataframe to run in the predict function
kaggle_test = kiva_test['en_clean']
kaggle_test = kaggle_test.to_list()

kiva_train.head(10)

Unnamed: 0,loan_id,en_clean,defaulted
0,7779,She opened a colmado out of the side of her ho...,0
1,2777,(First Loan): Joffre continues to run his loc...,1
2,6007,"Dina Santana is the mother of two children, Ju...",0
3,76,"Rosemary is 50 years old, single, and has 6 ch...",1
4,4217,"Segundo has a shop where he sells animal feed,...",0
5,5077,"I am a single parent, mother of 4. I sell groc...",0
6,6033,Mariana Jose Serda owns a general store in Mag...,0
7,843,Mary is a mother of four children. One of the ...,1
8,4357,Wilson is seeking his second loan with Kiva an...,0
9,7007,Clara lives with her two children (ages: 20 an...,0


### Preprocessing

In [None]:
pip install unidecode textstat



In [None]:
import unicodedata
import unidecode
stop_words = set(stopwords.words('english'))

lemmer = WordNetLemmatizer()

def preprocess(x):
    # Lower case
    x = x.lower()
    
    # Remove punctuation
    # x = re.sub(r'[^\w\s]', '', x)

    # Remove numbers
    # x = re.sub(r'\d+', '', x)
    
    # Remove non-unicode
    x = unidecode.unidecode(x)
        
    # Remove stopwords and lemmatize
    # x = [lemmer.lemmatize(w) for w in x.split() if w not in stop_words]
    x = [lemmer.lemmatize(w) for w in x.split()]
    return ' '.join(x) 

kiva_train['en_clean'] = kiva_train['en_clean'].apply(preprocess)

### Creating Train/Test Data

In [None]:
# Creating Training Data
X = kiva_train['en_clean']
y = kiva_train['defaulted']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

train_df = pd.concat([X_train,y_train], axis = 1)
eval_df = pd.concat([X_val,y_val],axis = 1)

In [None]:
model_args = ClassificationArgs()
model_args.reprocess_input_data = True
model_args.overwrite_output_dir = True
model_args.evaluate_during_training = False
model_args.use_multiprocessing = True
model_args.no_cache = True
model_args.save_model_every_epoch = False
model_args.wandb_project = "Simple Sweep_3"
model_args.train_batch_size = 32
model_args.eval_batch_size = 32
model_args.max_seq_length = 300
#model_args.num_train_epochs = 8
model_args.learning_rate = 4e-5

#Man seed 20, epochs 10


In [None]:
def train():
    # Initialize a new wandb run
    wandb.init()

    # Create a TransformerModel
    model = ClassificationModel("xlnet", "xlnet-base-cased", use_cuda=True, args=model_args, sweep_config=wandb.config)

    # Train the model
    model.train_model(train_df)

    # Evaluate the model
    model.eval_model(eval_df)

    preds, outputs = model.predict(X_val.to_list())
    print(classification_report(y_val, preds))

    # Sync wandb
    wandb.join()


wandb.agent(sweep_id, train)

INFO:wandb.agents.pyagent:Starting sweep agent: entity=None, project=None, count=None
[34m[1mwandb[0m: Sweep Agent: Waiting for job.
ERROR:wandb.sdk.internal.internal_api:500 response executing GraphQL.
ERROR:wandb.sdk.internal.internal_api:{"errors":[{"message":"Post \"http://anaconda.default.svc.cluster.local/search\": context deadline exceeded","path":["agentHeartbeat"]}],"data":{"agentHeartbeat":null}}
[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: Post "http://anaconda.default.svc.cluster.local/search": context deadline exceeded (<Response [500]>)
ERROR:wandb.sdk.internal.internal_api:500 response executing GraphQL.
ERROR:wandb.sdk.internal.internal_api:{"errors":[{"message":"context deadline exceeded","path":["agentHeartbeat"]}],"data":{"agentHeartbeat":null}}
[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: context deadline exceeded (<Response [500]>)
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: z95belum w

INFO:filelock:Lock 140441879463312 acquired on /root/.cache/huggingface/transformers/06bdb0f5882dbb833618c81c3b4c996a0c79422fa2c95ffea3827f92fc2dba6b.da982e2e596ec73828dbae86525a1870e513bd63aae5a2dc773ccc840ac5c346.lock


Downloading:   0%|          | 0.00/760 [00:00<?, ?B/s]

INFO:filelock:Lock 140441879463312 released on /root/.cache/huggingface/transformers/06bdb0f5882dbb833618c81c3b4c996a0c79422fa2c95ffea3827f92fc2dba6b.da982e2e596ec73828dbae86525a1870e513bd63aae5a2dc773ccc840ac5c346.lock
INFO:filelock:Lock 140444350970512 acquired on /root/.cache/huggingface/transformers/9461853998373b0b2f8ef8011a13b62a2c5f540b2c535ef3ea46ed8a062b16a9.3e214f11a50e9e03eb47535b58522fc3cc11ac67c120a9450f6276de151af987.lock


Downloading:   0%|          | 0.00/467M [00:00<?, ?B/s]

INFO:filelock:Lock 140444350970512 released on /root/.cache/huggingface/transformers/9461853998373b0b2f8ef8011a13b62a2c5f540b2c535ef3ea46ed8a062b16a9.3e214f11a50e9e03eb47535b58522fc3cc11ac67c120a9450f6276de151af987.lock
Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly i

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

INFO:filelock:Lock 140441728518608 released on /root/.cache/huggingface/transformers/df73bc9f8d13bf2ea4dab95624895e45a550a0f0a825e41fc25440bf367ee3c8.d93497120e3a865e2970f26abdf7bf375896f97fde8b874b70909592a6c785c9.lock
INFO:filelock:Lock 140441728518736 acquired on /root/.cache/huggingface/transformers/46f47734f3dcaef7e236b9a3e887f27814e18836a8db7e6a49148000058a1a54.2a683f915238b4f560dab0c724066cf0a7de9a851e96b0fb3a1e7f0881552f53.lock


Downloading:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

INFO:filelock:Lock 140441728518736 released on /root/.cache/huggingface/transformers/46f47734f3dcaef7e236b9a3e887f27814e18836a8db7e6a49148000058a1a54.2a683f915238b4f560dab0c724066cf0a7de9a851e96b0fb3a1e7f0881552f53.lock
  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/4910 [00:00<?, ?it/s]

Epoch:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 0 of 8:   0%|          | 0/154 [00:00<?, ?it/s]



Running Epoch 1 of 8:   0%|          | 0/154 [00:00<?, ?it/s]

Running Epoch 2 of 8:   0%|          | 0/154 [00:00<?, ?it/s]

Running Epoch 3 of 8:   0%|          | 0/154 [00:00<?, ?it/s]

Running Epoch 4 of 8:   0%|          | 0/154 [00:00<?, ?it/s]

Running Epoch 5 of 8:   0%|          | 0/154 [00:00<?, ?it/s]

Running Epoch 6 of 8:   0%|          | 0/154 [00:00<?, ?it/s]

Running Epoch 7 of 8:   0%|          | 0/154 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of xlnet model complete. Saved to outputs/.
  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1228 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/39 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.7653550057092278, 'tp': 528, 'tn': 556, 'fp': 73, 'fn': 71, 'auroc': 0.9434444264553269, 'auprc': 0.9408631019338536, 'eval_loss': 0.774293683278255}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1228 [00:00<?, ?it/s]

Exception ignored in: <finalize object at 0x7fbacf552be0; dead>
Traceback (most recent call last):
  File "/usr/lib/python3.7/weakref.py", line 572, in __call__
    return info.func(*info.args, **(info.kwargs or {}))
  File "/usr/lib/python3.7/tempfile.py", line 936, in _cleanup
    _rmtree(name)
  File "/usr/lib/python3.7/shutil.py", line 485, in rmtree
    onerror(os.lstat, path, sys.exc_info())
  File "/usr/lib/python3.7/shutil.py", line 483, in rmtree
    orig_st = os.lstat(path)
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmpu1b01fuy'
    return info.func(*info.args, **(info.kwargs or {}))
Exception ignored in: <finalize object at 0x7fbacf552c50; dead>
Traceback (most recent call last):
  File "/usr/lib/python3.7/weakref.py", line 572, in __call__
  File "/usr/lib/python3.7/tempfile.py", line 936, in _cleanup
    _rmtree(name)
  File "/usr/lib/python3.7/shutil.py", line 485, in rmtree
    onerror(os.lstat, path, sys.exc_info())
  File "/usr/lib/python3.7/shutil.p

  0%|          | 0/39 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.89      0.88      0.89       629
           1       0.88      0.88      0.88       599

    accuracy                           0.88      1228
   macro avg       0.88      0.88      0.88      1228
weighted avg       0.88      0.88      0.88      1228



Exception ignored in: <finalize object at 0x7fbacf552be0; dead>
Traceback (most recent call last):
  File "/usr/lib/python3.7/weakref.py", line 572, in __call__
    return info.func(*info.args, **(info.kwargs or {}))
  File "/usr/lib/python3.7/tempfile.py", line 936, in _cleanup
    _rmtree(name)
  File "/usr/lib/python3.7/shutil.py", line 485, in rmtree
    onerror(os.lstat, path, sys.exc_info())
  File "/usr/lib/python3.7/shutil.py", line 483, in rmtree
    orig_st = os.lstat(path)
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmpu1b01fuy'
Exception ignored in: <finalize object at 0x7fbacf552c50; dead>
Traceback (most recent call last):
  File "/usr/lib/python3.7/weakref.py", line 572, in __call__
    return info.func(*info.args, **(info.kwargs or {}))
  File "/usr/lib/python3.7/tempfile.py", line 936, in _cleanup
    _rmtree(name)
  File "/usr/lib/python3.7/shutil.py", line 485, in rmtree
    onerror(os.lstat, path, sys.exc_info())
  File "/usr/lib/python3.7/shutil.p

VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.04736
lr,0.0
global_step,1200.0
_runtime,2118.0
_timestamp,1618125518.0
_step,26.0


0,1
Training loss,█▇▅▄▃▄▂▃▃▁▂▂▁▁▂▁▁▄▂▁▁▁▁▁
lr,▆██▇▇▇▆▆▆▅▅▅▅▄▄▄▃▃▃▂▂▂▁▁
global_step,▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇██
_runtime,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇████
_timestamp,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇████
_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██


[34m[1mwandb[0m: Agent Starting Run: 87ydk7oz with config:
[34m[1mwandb[0m: 	manual_seed: 20
[34m[1mwandb[0m: 	num_train_epochs: 2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

  0%|          | 0/4910 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Running Epoch 0 of 2:   0%|          | 0/154 [00:00<?, ?it/s]



Running Epoch 1 of 2:   0%|          | 0/154 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of xlnet model complete. Saved to outputs/.
  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1228 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/39 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.7148215672593168, 'tp': 537, 'tn': 514, 'fp': 115, 'fn': 62, 'auroc': 0.9379954932837188, 'auprc': 0.9384614889138139, 'eval_loss': 0.33018677968245286}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1228 [00:00<?, ?it/s]

Exception ignored in: <finalize object at 0x7fbbd832d2c0; dead>
Traceback (most recent call last):
  File "/usr/lib/python3.7/weakref.py", line 572, in __call__
    return info.func(*info.args, **(info.kwargs or {}))
  File "/usr/lib/python3.7/tempfile.py", line 936, in _cleanup
    _rmtree(name)
  File "/usr/lib/python3.7/shutil.py", line 485, in rmtree
    onerror(os.lstat, path, sys.exc_info())
  File "/usr/lib/python3.7/shutil.py", line 483, in rmtree
    orig_st = os.lstat(path)
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmpqj_fp9sp'
Exception ignored in: <finalize object at 0x7fbb204790e0; dead>
Traceback (most recent call last):
  File "/usr/lib/python3.7/weakref.py", line 572, in __call__
    return info.func(*info.args, **(info.kwargs or {}))
  File "/usr/lib/python3.7/tempfile.py", line 936, in _cleanup
    _rmtree(name)
  File "/usr/lib/python3.7/shutil.py", line 485, in rmtree
    onerror(os.lstat, path, sys.exc_info())
  File "/usr/lib/python3.7/shutil.p

  0%|          | 0/39 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.89      0.82      0.85       629
           1       0.82      0.90      0.86       599

    accuracy                           0.86      1228
   macro avg       0.86      0.86      0.86      1228
weighted avg       0.86      0.86      0.86      1228



VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.21043
lr,0.0
global_step,300.0
_runtime,557.0
_timestamp,1618126113.0
_step,8.0


0,1
Training loss,█▇▅▂▃▁
lr,▆█▆▄▃▁
global_step,▁▂▄▅▇█
_runtime,▁▂▄▅▆▇███
_timestamp,▁▂▄▅▆▇███
_step,▁▂▃▄▅▅▆▇█


[34m[1mwandb[0m: Agent Starting Run: xfevcm6j with config:
[34m[1mwandb[0m: 	manual_seed: 20
[34m[1mwandb[0m: 	num_train_epochs: 10
Exception ignored in: <finalize object at 0x7fbbd832d2c0; dead>
Traceback (most recent call last):
  File "/usr/lib/python3.7/weakref.py", line 572, in __call__
    return info.func(*info.args, **(info.kwargs or {}))
  File "/usr/lib/python3.7/tempfile.py", line 936, in _cleanup
    _rmtree(name)
  File "/usr/lib/python3.7/shutil.py", line 485, in rmtree
    onerror(os.lstat, path, sys.exc_info())
  File "/usr/lib/python3.7/shutil.py", line 483, in rmtree
    orig_st = os.lstat(path)
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmpqj_fp9sp'
Exception ignored in: <finalize object at 0x7fbb204790e0; dead>
Traceback (most recent call last):
  File "/usr/lib/python3.7/weakref.py", line 572, in __call__
    return info.func(*info.args, **(info.kwargs or {}))
  File "/usr/lib/python3.7/tempfile.py", line 936, in _cleanup
    _rmtree(nam

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

  0%|          | 0/4910 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/154 [00:00<?, ?it/s]



Running Epoch 1 of 10:   0%|          | 0/154 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/154 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/154 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/154 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/154 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/154 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/154 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/154 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/154 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of xlnet model complete. Saved to outputs/.
  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1228 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/39 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.7687305010580735, 'tp': 532, 'tn': 554, 'fp': 75, 'fn': 67, 'auroc': 0.9466983923921959, 'auprc': 0.9471647134115857, 'eval_loss': 0.8846457955928949}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1228 [00:00<?, ?it/s]

  0%|          | 0/39 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.89      0.88      0.89       629
           1       0.88      0.89      0.88       599

    accuracy                           0.88      1228
   macro avg       0.88      0.88      0.88      1228
weighted avg       0.88      0.88      0.88      1228



VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.00012
lr,0.0
global_step,1500.0
_runtime,2618.0
_timestamp,1618128768.0
_step,32.0


0,1
Training loss,█▇▆▅▅▅▄▃▂▃▃▆▃▁▂▂▁▂▁▅▃▁▁▁▁▁▁▁▁▁
lr,▆██▇▇▇▇▆▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇████
_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇███


[34m[1mwandb[0m: Agent Starting Run: 5j6qj6jt with config:
[34m[1mwandb[0m: 	manual_seed: 40
[34m[1mwandb[0m: 	num_train_epochs: 10


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

  0%|          | 0/4910 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/154 [00:00<?, ?it/s]



VBox(children=(Label(value=' 0.02MB of 0.02MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run 5j6qj6jt errored: RuntimeError('CUDA out of memory. Tried to allocate 114.00 MiB (GPU 0; 15.90 GiB total capacity; 14.58 GiB already allocated; 43.75 MiB free; 14.98 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 5j6qj6jt errored: RuntimeError('CUDA out of memory. Tried to allocate 114.00 MiB (GPU 0; 15.90 GiB total capacity; 14.58 GiB already allocated; 43.75 MiB free; 14.98 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: ap3l8vz5 with config:
[34m[1mwandb[0m: 	manual_seed: 60
[34m[1mwandb[0m: 	num_train_epochs: 8


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run ap3l8vz5 errored: RuntimeError('CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run ap3l8vz5 errored: RuntimeError('CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: oqbofesu with config:
[34m[1mwandb[0m: 	manual_seed: 100
[34m[1mwandb[0m: 	num_train_epochs: 5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run oqbofesu errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run oqbofesu errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rxgrf5eo with config:
[34m[1mwandb[0m: 	manual_seed: 40
[34m[1mwandb[0m: 	num_train_epochs: 10


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run rxgrf5eo errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run rxgrf5eo errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: bd2dtuy9 with config:
[34m[1mwandb[0m: 	manual_seed: 80
[34m[1mwandb[0m: 	num_train_epochs: 2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run bd2dtuy9 errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run bd2dtuy9 errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: 6wk375ao with config:
[34m[1mwandb[0m: 	manual_seed: 100
[34m[1mwandb[0m: 	num_train_epochs: 5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run 6wk375ao errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 6wk375ao errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Sweep Agent: Waiting for job.
500 response executing GraphQL.
{"errors":[{"message":"Post \"http://anaconda.default.svc.cluster.local/search\": context deadline exceeded","path":["agentHeartbeat"]}],"data":{"agentHeartbeat":null}}
[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: Post "http://anaconda.default.svc.cluster.local/search": context deadline exceeded (<Response [500]>)
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6smfv1co with config:
[34m[1mwandb[0m: 	manual_seed: 

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run 6smfv1co errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 6smfv1co errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: g2jsay1t with config:
[34m[1mwandb[0m: 	manual_seed: 80
[34m[1mwandb[0m: 	num_train_epochs: 10


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run g2jsay1t errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run g2jsay1t errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: 4vi0z3fe with config:
[34m[1mwandb[0m: 	manual_seed: 20
[34m[1mwandb[0m: 	num_train_epochs: 10


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run 4vi0z3fe errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 4vi0z3fe errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: y89eft04 with config:
[34m[1mwandb[0m: 	manual_seed: 80
[34m[1mwandb[0m: 	num_train_epochs: 5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run y89eft04 errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run y89eft04 errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: 110f4upp with config:
[34m[1mwandb[0m: 	manual_seed: 20
[34m[1mwandb[0m: 	num_train_epochs: 8


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run 110f4upp errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 110f4upp errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: c5we6vz6 with config:
[34m[1mwandb[0m: 	manual_seed: 60
[34m[1mwandb[0m: 	num_train_epochs: 5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run c5we6vz6 errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run c5we6vz6 errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: c6dxsmda with config:
[34m[1mwandb[0m: 	manual_seed: 80
[34m[1mwandb[0m: 	num_train_epochs: 10


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run c6dxsmda errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run c6dxsmda errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: w2ts7ozr with config:
[34m[1mwandb[0m: 	manual_seed: 100
[34m[1mwandb[0m: 	num_train_epochs: 10


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run w2ts7ozr errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run w2ts7ozr errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: 47rurtdi with config:
[34m[1mwandb[0m: 	manual_seed: 20
[34m[1mwandb[0m: 	num_train_epochs: 5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run 47rurtdi errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 47rurtdi errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: 3075t2hy with config:
[34m[1mwandb[0m: 	manual_seed: 100
[34m[1mwandb[0m: 	num_train_epochs: 5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run 3075t2hy errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 3075t2hy errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: pyspy9n6 with config:
[34m[1mwandb[0m: 	manual_seed: 20
[34m[1mwandb[0m: 	num_train_epochs: 10


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run pyspy9n6 errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run pyspy9n6 errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: 7jfo86zp with config:
[34m[1mwandb[0m: 	manual_seed: 80
[34m[1mwandb[0m: 	num_train_epochs: 10


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run 7jfo86zp errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 7jfo86zp errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: aineblt8 with config:
[34m[1mwandb[0m: 	manual_seed: 40
[34m[1mwandb[0m: 	num_train_epochs: 5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run aineblt8 errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run aineblt8 errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: sfbg3x7a with config:
[34m[1mwandb[0m: 	manual_seed: 100
[34m[1mwandb[0m: 	num_train_epochs: 2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run sfbg3x7a errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run sfbg3x7a errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: 44jjlofq with config:
[34m[1mwandb[0m: 	manual_seed: 100
[34m[1mwandb[0m: 	num_train_epochs: 10


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run 44jjlofq errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 44jjlofq errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: acqea87i with config:
[34m[1mwandb[0m: 	manual_seed: 100
[34m[1mwandb[0m: 	num_train_epochs: 10


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run acqea87i errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run acqea87i errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: o3gnuqze with config:
[34m[1mwandb[0m: 	manual_seed: 20
[34m[1mwandb[0m: 	num_train_epochs: 10


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run o3gnuqze errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run o3gnuqze errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: eu7fmpqy with config:
[34m[1mwandb[0m: 	manual_seed: 20
[34m[1mwandb[0m: 	num_train_epochs: 2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run eu7fmpqy errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run eu7fmpqy errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jd3x6dix with config:
[34m[1mwandb[0m: 	manual_seed: 100
[34m[1mwandb[0m: 	num_train_epochs: 8


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run jd3x6dix errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run jd3x6dix errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: 2k8nl8z2 with config:
[34m[1mwandb[0m: 	manual_seed: 100
[34m[1mwandb[0m: 	num_train_epochs: 10


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run 2k8nl8z2 errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 2k8nl8z2 errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: x9w5bqle with config:
[34m[1mwandb[0m: 	manual_seed: 80
[34m[1mwandb[0m: 	num_train_epochs: 5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run x9w5bqle errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run x9w5bqle errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gnf2muzq with config:
[34m[1mwandb[0m: 	manual_seed: 100
[34m[1mwandb[0m: 	num_train_epochs: 8


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run gnf2muzq errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run gnf2muzq errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: lg8z3l6c with config:
[34m[1mwandb[0m: 	manual_seed: 80
[34m[1mwandb[0m: 	num_train_epochs: 8


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run lg8z3l6c errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run lg8z3l6c errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: dtjrbe8f with config:
[34m[1mwandb[0m: 	manual_seed: 40
[34m[1mwandb[0m: 	num_train_epochs: 10


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run dtjrbe8f errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run dtjrbe8f errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: wlkmfskh with config:
[34m[1mwandb[0m: 	manual_seed: 60
[34m[1mwandb[0m: 	num_train_epochs: 2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run wlkmfskh errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run wlkmfskh errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: nrgprnyh with config:
[34m[1mwandb[0m: 	manual_seed: 40
[34m[1mwandb[0m: 	num_train_epochs: 8


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run nrgprnyh errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run nrgprnyh errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: kbt1o93e with config:
[34m[1mwandb[0m: 	manual_seed: 20
[34m[1mwandb[0m: 	num_train_epochs: 10


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run kbt1o93e errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run kbt1o93e errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: 7hzoglb6 with config:
[34m[1mwandb[0m: 	manual_seed: 60
[34m[1mwandb[0m: 	num_train_epochs: 8


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run 7hzoglb6 errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 7hzoglb6 errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: geer7oek with config:
[34m[1mwandb[0m: 	manual_seed: 100
[34m[1mwandb[0m: 	num_train_epochs: 2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run geer7oek errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run geer7oek errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: l7fobwbm with config:
[34m[1mwandb[0m: 	manual_seed: 40
[34m[1mwandb[0m: 	num_train_epochs: 8


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run l7fobwbm errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run l7fobwbm errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: 0p60q2vu with config:
[34m[1mwandb[0m: 	manual_seed: 80
[34m[1mwandb[0m: 	num_train_epochs: 5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run 0p60q2vu errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 0p60q2vu errored: RuntimeError('CUDA out of memory. Tried to allocate 94.00 MiB (GPU 0; 15.90 GiB total capacity; 14.87 GiB already allocated; 3.75 MiB free; 15.02 GiB reserved in total by PyTorch)')
[34m[1mwandb[0m: Agent Starting Run: olh45ni3 with config:
[34m[1mwandb[0m: 	manual_seed: 40
[34m[1mwandb[0m: 	num_train_epochs: 8


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

Problem finishing run
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/agents/pyagent.py", line 303, in _run_job
    self._function()
  File "<ipython-input-33-63bfa49bbb10>", line 9, in train
    model.train_model(train_df)
  File "/usr/local/lib/python3.7/dist-packages/simpletransformers/classification/classification_model.py", line 404, in train_model
    self._move_model_to_device()
  File "/usr/local/lib/python3.7/dist-packages/simpletransformers/classification/classification_model.py", line 1631, in _move_model_to_device
    self.model.to(self.device)
  File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 673, in to
    return self._apply(convert)
  File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 387, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 387, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.7/dist