In [1]:
%pip install transformers[torch] huggingface_hub datasets evaluate torchvision kagglehub ipywidgets

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting huggingface_hub
  Downloading huggingface_hub-0.29.1-py3-none-any.whl.metadata (13 kB)
Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting kagglehub
  Downloading kagglehub-0.3.9-py3-none-any.whl.metadata (30 kB)
Collecting ipywidgets
  Downloading ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)
Collecting transformers[torch]
  Downloading transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers[torch])
  Downloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting accelerate>=0.26.0 (from transformers[torch])
  Downloading accelerate-1.4.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10

In [1]:
from datasets import concatenate_datasets, load_from_disk
from transformers import BasicTokenizer
from torch.utils.data import DataLoader
from transformers import Trainer
import kagglehub
import torch
import base

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/jovyan/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


In [2]:
my_glove = kagglehub.dataset_download("thanakomsn/glove6b300dtxt")
print(my_glove)

/home/jovyan/.cache/kagglehub/datasets/thanakomsn/glove6b300dtxt/versions/1


In [3]:
GLOVE_FILE = f"{my_glove}/glove.6B.300d.txt"

In [4]:
train_data = load_from_disk("./data/sst2/train-logits")
eval_data = load_from_disk("./data/sst2/eval-logits")
test_data = load_from_disk("./data/sst2/test-logits")

all_train_data = load_from_disk("./data/sst2/train-logits-augmented")
test_blank_data = load_from_disk("./data/sst2/test-blank-logits")

all_data = concatenate_datasets([load_from_disk(file) for file in ["./data/sst2/eval-logits", "./data/sst2/test-logits", "./data/sst2/train-logits-augmented", "./data/sst2/test-blank-logits"]])
tokenizer = BasicTokenizer(do_lower_case=True)

In [5]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available and will be used:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("GPU is not available, using CPU.")

GPU is available and will be used: NVIDIA A100 80GB PCIe MIG 2g.20gb


In [6]:
train_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), train_data))
eval_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), eval_data))
test_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), test_data))

all_train_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), all_train_data))
test_data_blank_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), test_blank_data))

all_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), all_data))

In [7]:
vocab = base.get_vocab(all_data_tokens)

In [8]:
word_index = dict(zip(vocab, range(len(vocab))))

In [9]:
embeddings_index = base.get_embeddings_indeces(GLOVE_FILE)

Found 400000 word vectors.


In [10]:
print(len(vocab))
num_tokens = len(vocab) + 2
embedding_dim = 300

16152


In [11]:
embedding_matrix = base.get_embedding_matrix(num_tokens, embedding_dim, word_index, embeddings_index)

Converted 15775 words (377) misses


In [12]:
train_data_index = list(map(lambda x: list(map(lambda y: word_index[y], x)),train_data_tokens))
eval_data_index = list(map(lambda x: list(map(lambda y: word_index[y], x)),eval_data_tokens))
test_data_index = list(map(lambda x: list(map(lambda y: word_index[y], x)),test_data_tokens))

all_train_data_index = list(map(lambda x: list(map(lambda y: word_index[y], x)),all_train_data_tokens))
test_data_blank_index = list(map(lambda x: list(map(lambda y: word_index[y], x)),test_data_blank_tokens))

In [13]:
train_padded_data = list(map(lambda x: base.padd(x,60), train_data_index))
eval_padded_data = list(map(lambda x: base.padd(x,60), eval_data_index))
test_padded_data = list(map(lambda x: base.padd(x,60), test_data_index))

all_train_padded_data = list(map(lambda x: base.padd(x,60), all_train_data_index))
test_blank_padded_data = list(map(lambda x: base.padd(x,60), test_data_blank_index))

In [14]:
train_data = train_data.add_column("input_ids", train_padded_data)
eval_data = eval_data.add_column("input_ids", eval_padded_data)
test_data = test_data.add_column("input_ids", test_padded_data)

all_train_data = all_train_data.add_column("input_ids", all_train_padded_data)
test_blank_data = test_blank_data.add_column("input_ids", test_blank_padded_data)

In [15]:
model = base.BiLSTMClassifier(embedding_matrix=embedding_matrix, embedding_dim=embedding_dim, fc_dim=400, hidden_dim=300, output_dim=2)

In [16]:
print(model)

BiLSTMClassifier(
  (embedding): Embedding(16154, 300)
  (lstm): LSTM(300, 300, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=600, out_features=400, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc2): Linear(in_features=400, out_features=2, bias=True)
)


In [17]:
training_args = base.get_training_args(output_dir="./results/bilstm-base", logging_dir='./logs/bilstm-base', lr=.001,  epochs=10, batch_size=128)

In [18]:
base.reset_seed()

In [19]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=eval_data,
    compute_metrics=base.compute_metrics,
)

In [20]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3798,0.426829,0.801606,0.808496,0.802949,0.800925
2,0.2894,0.398621,0.834862,0.837653,0.834007,0.834227
3,0.2242,0.47413,0.821101,0.824624,0.820115,0.82025
4,0.1738,0.432823,0.84633,0.846639,0.846026,0.846172
5,0.1315,0.455008,0.849771,0.84973,0.849699,0.849714
6,0.0975,0.483489,0.845183,0.84766,0.844395,0.844652
7,0.0709,0.513457,0.855505,0.855577,0.85533,0.855413
8,0.0501,0.576581,0.857798,0.857776,0.857708,0.857738
9,0.0338,0.625796,0.863532,0.863513,0.863633,0.863518
10,0.0235,0.693838,0.862385,0.862537,0.862171,0.862281


TrainOutput(global_step=4210, training_loss=0.1474462518216312, metrics={'train_runtime': 85.686, 'train_samples_per_second': 6287.958, 'train_steps_per_second': 49.133, 'total_flos': 0.0, 'train_loss': 0.1474462518216312, 'epoch': 10.0})

In [21]:
model.eval()

BiLSTMClassifier(
  (embedding): Embedding(16154, 300)
  (lstm): LSTM(300, 300, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=600, out_features=400, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc2): Linear(in_features=400, out_features=2, bias=True)
)

In [22]:
trainer.evaluate(test_data)

{'eval_loss': 0.2900348901748657,
 'eval_accuracy': 0.9319227913882703,
 'eval_precision': 0.9301372804074213,
 'eval_recall': 0.9327142278224145,
 'eval_f1': 0.9312206262292437,
 'eval_runtime': 3.557,
 'eval_samples_per_second': 3786.927,
 'eval_steps_per_second': 29.801,
 'epoch': 10.0}

In [23]:
torch.save(model.state_dict(), "./models/sst2/bilstm-base.pth")

In [27]:
test_blank_data.set_format(type='torch', columns=["input_ids"], device="cuda")
test_blank_dataloader = DataLoader(test_blank_data, batch_size=128, shuffle=False)
test_blank_logits = base.generate_logits(test_blank_dataloader, model, images=False)

Generating logits for given dataset:   0%|          | 0/15 [00:00<?, ?it/s]

In [28]:
base.generate_real_test_file_sst2(test_blank_logits, "./data/sst2/bilstm-base-test.tsv")

Created output file named: ./data/sst2/bilstm-base-test.tsv upload it to GLUE benchmark to obtain results!


Reálné skóre na pravé test části datasetu

![Real test score (GLUE Benchmark)](imgs/sst2_BiLSTM_base_score.png)

In [29]:
student_model = base.BiLSTMClassifier(embedding_matrix=embedding_matrix, embedding_dim=embedding_dim, fc_dim=400, hidden_dim=300, output_dim=2)

In [30]:
training_args = base.get_training_args(output_dir="./results/bilstm-distill", remove_unused_columns=False, logging_dir='./logs/bilstm-distill', lr=.001,  epochs=10, batch_size=128, lambda_param=.75, temp=5)

In [31]:
base.reset_seed()

In [32]:
trainer = base.DistilTrainer(
    student_model=student_model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=eval_data,
    compute_metrics=base.compute_metrics,
)

In [33]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.1942,2.056107,0.795872,0.805212,0.797445,0.794834
2,1.4561,1.604402,0.847477,0.847628,0.847699,0.847475
3,1.0296,1.642356,0.834862,0.835508,0.834428,0.834611
4,0.7358,1.732866,0.825688,0.828375,0.82483,0.825017
5,0.5512,1.62354,0.849771,0.849713,0.849741,0.849726
6,0.4141,1.686715,0.856651,0.856758,0.856456,0.856552
7,0.3246,1.657303,0.853211,0.853162,0.853162,0.853162
8,0.2525,1.562813,0.862385,0.862339,0.862339,0.862339
9,0.2031,1.620576,0.858945,0.858887,0.858961,0.858914
10,0.1701,1.621368,0.861239,0.861237,0.861129,0.861173


TrainOutput(global_step=4210, training_loss=0.7331444420893798, metrics={'train_runtime': 95.1036, 'train_samples_per_second': 5665.298, 'train_steps_per_second': 44.268, 'total_flos': 0.0, 'train_loss': 0.7331444420893798, 'epoch': 10.0})

In [34]:
student_model.eval()

BiLSTMClassifier(
  (embedding): Embedding(16154, 300)
  (lstm): LSTM(300, 300, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=600, out_features=400, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc2): Linear(in_features=400, out_features=2, bias=True)
)

In [35]:
trainer.evaluate(test_data)

{'eval_loss': 0.6320107579231262,
 'eval_accuracy': 0.9423162583518931,
 'eval_precision': 0.9409631500876571,
 'eval_recall': 0.9424131145032519,
 'eval_f1': 0.9416333718684016,
 'eval_runtime': 3.6661,
 'eval_samples_per_second': 3674.155,
 'eval_steps_per_second': 28.913,
 'epoch': 10.0}

In [36]:
torch.save(student_model.state_dict(), "./models/sst2/bilstm-distill.pth")

In [37]:
test_blank_logits = base.generate_logits(test_blank_dataloader, student_model, images=False)
base.generate_real_test_file_sst2(test_blank_logits, "./data/sst2/bilstm-distill-test.tsv")

Generating logits for given dataset:   0%|          | 0/15 [00:00<?, ?it/s]

Created output file named: ./data/sst2/bilstm-distill-test.tsv upload it to GLUE benchmark to obtain results!


Reálné skóre na pravé test části datasetu

![Real test score (GLUE Benchmark)](imgs/sst2_BiLSTM_distill_score.png)

In [38]:
model = base.BiLSTMClassifier(embedding_matrix=embedding_matrix, embedding_dim=embedding_dim, fc_dim=400, hidden_dim=300, output_dim=2)

In [39]:
training_args = base.get_training_args(output_dir="./results/bilstm-base-aug", logging_dir='./logs/bilstm-base-aug', lr=.001,  epochs=10, batch_size=128)

In [40]:
base.reset_seed()

In [41]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=all_train_data,
    eval_dataset=eval_data,
    compute_metrics=base.compute_metrics,
)

In [42]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.209,0.454815,0.855505,0.85696,0.854909,0.855169
2,0.0748,0.579263,0.854358,0.855832,0.854961,0.854315
3,0.0408,0.658038,0.847477,0.848728,0.848036,0.847443
4,0.026,0.930178,0.844037,0.844164,0.843816,0.843918
5,0.0176,1.166061,0.83945,0.840988,0.840069,0.839395
6,0.0123,1.179484,0.841743,0.841856,0.841942,0.84174
7,0.0085,1.307335,0.850917,0.850863,0.850951,0.850889
8,0.0059,1.560086,0.850917,0.851204,0.851204,0.850917
9,0.0041,1.780422,0.857798,0.857744,0.857834,0.857771
10,0.003,1.952711,0.852064,0.852005,0.852078,0.852031


TrainOutput(global_step=41690, training_loss=0.040209468224832665, metrics={'train_runtime': 587.7082, 'train_samples_per_second': 9079.846, 'train_steps_per_second': 70.937, 'total_flos': 0.0, 'train_loss': 0.040209468224832665, 'epoch': 10.0})

In [43]:
model.eval()

BiLSTMClassifier(
  (embedding): Embedding(16154, 300)
  (lstm): LSTM(300, 300, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=600, out_features=400, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc2): Linear(in_features=400, out_features=2, bias=True)
)

In [44]:
trainer.evaluate(test_data)

{'eval_loss': 0.7202707529067993,
 'eval_accuracy': 0.9443207126948775,
 'eval_precision': 0.9429466041025756,
 'eval_recall': 0.9445230778526156,
 'eval_f1': 0.9436697689397209,
 'eval_runtime': 3.5755,
 'eval_samples_per_second': 3767.253,
 'eval_steps_per_second': 29.646,
 'epoch': 10.0}

In [45]:
torch.save(model.state_dict(), "./models/sst2/bilstm-base-aug.pth")

In [46]:
test_blank_logits = base.generate_logits(test_blank_dataloader, model, images=False)
base.generate_real_test_file_sst2(test_blank_logits, "./data/sst2/bilstm-base-aug-test.tsv")

Generating logits for given dataset:   0%|          | 0/15 [00:00<?, ?it/s]

Created output file named: ./data/sst2/bilstm-base-aug-test.tsv upload it to GLUE benchmark to obtain results!


Reálné skóre na pravé test části datasetu

![Real test score (GLUE Benchmark)](imgs/sst2_BiLSTM_base_aug_score.png)

In [47]:
student_model = base.BiLSTMClassifier(embedding_matrix=embedding_matrix, embedding_dim=embedding_dim, fc_dim=400, hidden_dim=300, output_dim=2)

In [48]:
training_args = base.get_training_args(output_dir="./results/bilstm-distill-aug", remove_unused_columns=False, logging_dir='./logs/bilstm-distill-aug', lr=.001,  epochs=10, batch_size=128, lambda_param=.75, temp=5)

In [49]:
base.reset_seed()

In [50]:
trainer = base.DistilTrainer(
    student_model=student_model,
    args=training_args,
    train_dataset=all_train_data,
    eval_dataset=eval_data,
    compute_metrics=base.compute_metrics,
)

In [51]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8418,1.224699,0.869266,0.870204,0.868801,0.869043
2,0.2562,1.109718,0.875,0.87497,0.874937,0.874952
3,0.1555,1.065162,0.881881,0.882461,0.881525,0.881731
4,0.1142,1.024786,0.875,0.875213,0.874768,0.874897
5,0.0915,0.99599,0.883028,0.883881,0.883493,0.883018
6,0.0765,0.952517,0.87844,0.878707,0.878189,0.878332
7,0.0655,0.920159,0.887615,0.887562,0.887619,0.887586
8,0.0573,0.90783,0.885321,0.885313,0.88524,0.885272
9,0.051,0.903743,0.887615,0.8878,0.887408,0.887529
10,0.0462,0.88917,0.885321,0.885424,0.885156,0.885248


TrainOutput(global_step=41690, training_loss=0.17556492912308952, metrics={'train_runtime': 653.7124, 'train_samples_per_second': 8163.07, 'train_steps_per_second': 63.774, 'total_flos': 0.0, 'train_loss': 0.17556492912308952, 'epoch': 10.0})

In [52]:
student_model.eval()

BiLSTMClassifier(
  (embedding): Embedding(16154, 300)
  (lstm): LSTM(300, 300, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=600, out_features=400, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc2): Linear(in_features=400, out_features=2, bias=True)
)

In [53]:
trainer.evaluate(test_data)

{'eval_loss': 0.33813270926475525,
 'eval_accuracy': 0.9596139569413511,
 'eval_precision': 0.9584960659013655,
 'eval_recall': 0.9598496283543609,
 'eval_f1': 0.9591285149534032,
 'eval_runtime': 3.6436,
 'eval_samples_per_second': 3696.876,
 'eval_steps_per_second': 29.092,
 'epoch': 10.0}

In [54]:
torch.save(model.state_dict(), "./models/sst2/bilstm-distill-aug.pth")

In [56]:
test_blank_logits = base.generate_logits(test_blank_dataloader, student_model, images=False)
base.generate_real_test_file_sst2(test_blank_logits, "./data/sst2/bilstm-distill-aug-test.tsv")

Generating logits for given dataset:   0%|          | 0/15 [00:00<?, ?it/s]

Created output file named: ./data/sst2/bilstm-distill-aug-test.tsv upload it to GLUE benchmark to obtain results!


Reálné skóre na pravé test části datasetu

![Real test score (GLUE Benchmark)](imgs/sst2_BiLSTM_distill_aug_score.png)

Skóre učitelského modelu na reálné test části datasetu

![SST2 test score for best BERT model](imgs/sst2_BERT_test_score.png)