In [1]:
import time, sys
import numpy as np

import torch

from classifier_for_training import Classifier

In [None]:
def set_reproducible():
    # The below is necessary to have reproducible behavior.
    import random as rn
    import os
    os.environ['PYTHONHASHSEED'] = '0'
    # The below is necessary for starting Numpy generated random numbers
    # in a well-defined initial state.
    np.random.seed(17)
    # The below is necessary for starting core Python generated random numbers
    # in a well-defined state.
    rn.seed(12345)

def load_label_output(filename):
    with open(filename, 'r', encoding='UTF-8') as f:
        return [line.strip().split("\t")[0] for line in f if line.strip()]

def eval_list(glabels, slabels):
    if (len(glabels) != len(slabels)):
        print("\nWARNING: label count in system output (%d) is different from gold label count (%d)\n" % (
        len(slabels), len(glabels)))
    n = min(len(slabels), len(glabels))
    incorrect_count = 0
    for i in range(n):
        if slabels[i] != glabels[i]: incorrect_count += 1
    acc = (n - incorrect_count) / n
    return acc*100

def train_and_eval(classifier, trainfile, devfile, testfile, run_id, device):
    print(f"\nRUN: {run_id}")
    print("  %s.1. Training the classifier..." % str(run_id))
    classifier.train(trainfile, devfile, device)
    print("  %s.2. Eval on the dev set..." % str(run_id), end="")
    slabels = classifier.predict(devfile, device)
    glabels = load_label_output(devfile)
    devacc = eval_list(glabels, slabels)
    print(" Acc.: %.2f" % devacc)
    testacc = -1
    if testfile is not None:
        # Evaluation on the test data
        print("  %s.3. Eval on the test set..." % str(run_id), end="")
        slabels = classifier.predict(testfile)
        glabels = load_label_output(testfile)
        testacc = eval_list(glabels, slabels)
        print(" Acc.: %.2f" % testacc)
    print()
    return (devacc, testacc)

---
# BERT

In [None]:
n_runs = 5
gpu = None

device_name = "cpu" if gpu is None else f"cuda:{gpu}"
device = torch.device(device_name)
set_reproducible()
datadir = "../data/"
trainfile = datadir + "traindata.csv"
devfile = datadir + "devdata.csv"
testfile = None

# Perform the runs
start_time = time.perf_counter()
devaccs = []
testaccs = []
for i in range(1, n_runs+1):
    classifier = Classifier(model_name='bert-base-uncased')
    devacc, testacc = train_and_eval(classifier, trainfile, devfile, testfile, i, device)
    devaccs.append(np.round(devacc,2))
    testaccs.append(np.round(testacc,2))

print('\nCompleted %d runs.' % n_runs)
total_exec_time = (time.perf_counter() - start_time)
print("Dev accs:", devaccs)
print("Mean Dev Acc.: %.2f (%.2f)" % (np.mean(devaccs), np.std(devaccs)))
print("\nExec time: %.2f s. ( %d per run )" % (total_exec_time, total_exec_time / n_runs))

---
# DistilBert

In [12]:
n_runs = 5
gpu = None

device_name = "cpu" if gpu is None else f"cuda:{gpu}"
device = torch.device(device_name)
set_reproducible()
datadir = "../data/"
trainfile = datadir + "traindata.csv"
devfile = datadir + "devdata.csv"
testfile = None

# Perform the runs
start_time = time.perf_counter()
devaccs = []
testaccs = []
for i in range(1, n_runs+1):
    classifier = Classifier(model_name='distilbert-base-uncased')
    devacc, testacc = train_and_eval(classifier, trainfile, devfile, testfile, i, device)
    devaccs.append(np.round(devacc,2))
    testaccs.append(np.round(testacc,2))

print('\nCompleted %d runs.' % n_runs)
total_exec_time = (time.perf_counter() - start_time)
print("Dev accs:", devaccs)
print("Mean Dev Acc.: %.2f (%.2f)" % (np.mean(devaccs), np.std(devaccs)))
print("\nExec time: %.2f s. ( %d per run )" % (total_exec_time, total_exec_time / n_runs))

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'pre_classifier.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 1
  1.1. Training the classifier...
  1.2. Eval on the dev set... Acc.: 59.04



Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'pre_classifier.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 2
  2.1. Training the classifier...
  2.2. Eval on the dev set... Acc.: 54.52



Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'pre_classifier.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 3
  3.1. Training the classifier...
  3.2. Eval on the dev set... Acc.: 56.38



Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'pre_classifier.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 4
  4.1. Training the classifier...
  4.2. Eval on the dev set... Acc.: 56.12



Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'pre_classifier.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 5
  5.1. Training the classifier...
  5.2. Eval on the dev set... Acc.: 55.85


Completed 5 runs.
Dev accs: [59.04, 54.52, 56.38, 56.12, 55.85]
Mean Dev Acc.: 56.38 (1.48)

Exec time: 1559.93 s. ( 311 per run )


---
# ELECTRA Small Discriminator

In [9]:
n_runs = 5
gpu = None

device_name = "cpu" if gpu is None else f"cuda:{gpu}"
device = torch.device(device_name)
set_reproducible()
datadir = "../data/"
trainfile = datadir + "traindata.csv"
devfile = datadir + "devdata.csv"
testfile = None

# Perform the runs
start_time = time.perf_counter()
devaccs = []
testaccs = []
for i in range(1, n_runs+1):
    classifier = Classifier(model_name='google/electra-small-discriminator')
    devacc, testacc = train_and_eval(classifier, trainfile, devfile, testfile, i, device)
    devaccs.append(np.round(devacc,2))
    testaccs.append(np.round(testacc,2))

print('\nCompleted %d runs.' % n_runs)
total_exec_time = (time.perf_counter() - start_time)
print("Dev accs:", devaccs)
print("Mean Dev Acc.: %.2f (%.2f)" % (np.mean(devaccs), np.std(devaccs)))
print("\nExec time: %.2f s. ( %d per run )" % (total_exec_time, total_exec_time / n_runs))

Downloading vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/54.2M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 1
  1.1. Training the classifier...
  1.2. Eval on the dev set... Acc.: 57.98



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 2
  2.1. Training the classifier...
  2.2. Eval on the dev set... Acc.: 55.32



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 3
  3.1. Training the classifier...
  3.2. Eval on the dev set... Acc.: 59.84



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 4
  4.1. Training the classifier...
  4.2. Eval on the dev set... Acc.: 59.57



Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 5
  5.1. Training the classifier...
  5.2. Eval on the dev set... Acc.: 52.13


Completed 5 runs.
Dev accs: [57.98, 55.32, 59.84, 59.57, 52.13]
Mean Dev Acc.: 56.97 (2.90)

Exec time: 654.47 s. ( 130 per run )


---
# RoBERTa

In [14]:
n_runs = 5
gpu = None

device_name = "cpu" if gpu is None else f"cuda:{gpu}"
device = torch.device(device_name)
set_reproducible()
datadir = "../data/"
trainfile = datadir + "traindata.csv"
devfile = datadir + "devdata.csv"
testfile = None

# Perform the runs
start_time = time.perf_counter()
devaccs = []
testaccs = []
for i in range(1, n_runs+1):
    classifier = Classifier(model_name='roberta-base')
    devacc, testacc = train_and_eval(classifier, trainfile, devfile, testfile, i, device)
    devaccs.append(np.round(devacc,2))
    testaccs.append(np.round(testacc,2))

print('\nCompleted %d runs.' % n_runs)
total_exec_time = (time.perf_counter() - start_time)
print("Dev accs:", devaccs)
print("Mean Dev Acc.: %.2f (%.2f)" % (np.mean(devaccs), np.std(devaccs)))
print("\nExec time: %.2f s. ( %d per run )" % (total_exec_time, total_exec_time / n_runs))

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 1
  1.1. Training the classifier...
  1.2. Eval on the dev set... Acc.: 59.04



Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 2
  2.1. Training the classifier...
  2.2. Eval on the dev set... Acc.: 59.04



Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 3
  3.1. Training the classifier...
  3.2. Eval on the dev set... Acc.: 58.51



Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 4
  4.1. Training the classifier...
  4.2. Eval on the dev set... Acc.: 58.24



Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



RUN: 5
  5.1. Training the classifier...


KeyboardInterrupt: 