In [1]:
import torch
from transformers import (AutoTokenizer, AutoConfig,
AutoModelForSequenceClassification)
from transformers import Trainer, TrainingArguments
from datasets import Dataset, DatasetDict
import numpy as np
import pandas as pd
from scipy.special import expit as sigmoid
from sklearn.metrics import classification_report

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('processed-files/df.csv')

In [3]:
labels = np.load('processed-files/new_labels.npy', allow_pickle = True)
labels = np.unique(labels, return_inverse=True)[1]
df['label'] = torch.tensor(labels, dtype=torch.float)

In [4]:
df = df.sample(frac=0.1, random_state=42)

In [5]:
df_test = df.sample(frac=0.75, random_state=42)
df_train = df.drop(df_test.index).sample(frac=0.95, random_state=42)
df_valid = df.drop(df_test.index).drop(df_train.index)

ds = DatasetDict({
"train": Dataset.from_pandas(df_train.reset_index(drop=True)),
"valid": Dataset.from_pandas(df_valid.reset_index(drop=True)),
"test": Dataset.from_pandas(df_test.reset_index(drop=True))})

In [6]:
model_ckpt = "sentence-transformers/paraphrase-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
def tokenize(batch):
    return tokenizer(batch["Sentence"], padding='max_length', truncation=True)

In [7]:
ds_enc = ds.map(tokenize, batched=True)
ds_enc = ds_enc.remove_columns(['Word A', 'Word B', 'Relation', 'Sentence'])

100%|██████████| 35/35 [00:04<00:00,  7.25ba/s]
100%|██████████| 2/2 [00:00<00:00,  8.11ba/s]
100%|██████████| 109/109 [00:15<00:00,  7.13ba/s]


In [8]:
training_args = TrainingArguments(output_dir="trainer", evaluation_strategy="epoch")

In [9]:
# from datasets import load_metric
# metric = load_metric("hinge_loss")

def compute_metrics(eval_pred):
    # hinge loss
    logits, labels = eval_pred
    logits = logits[:, 1] - logits[:, 0]
    loss = np.mean(np.maximum(0, 1 - labels * logits))
    # accuracy
    preds = np.where(logits < 0, 0, 1)
    acc = (preds == labels).mean()
    return {
        "hinge_loss": loss,
        "accuracy": acc,
    }
    


In [10]:
config = AutoConfig.from_pretrained(model_ckpt)
config.num_labels = 1
model = AutoModelForSequenceClassification.from_pretrained('sentence-transformers/paraphrase-MiniLM-L6-v2',
config=config)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=ds_enc["train"],
    eval_dataset=ds_enc["valid"],
    compute_metrics=compute_metrics,
)


trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/paraphrase-MiniLM-L6-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
***** Running training *****
  Num examples = 34206
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 12828
  0%|          | 35/12828 [01:36<10:46:13,  3.03s/it]

KeyboardInterrupt: 

In [None]:
# evaluate the model on the test set
trainer.evaluate(ds_enc["test"])

***** Running Evaluation *****
  Num examples = 10802
  Batch size = 8
100%|██████████| 1351/1351 [19:00<00:00,  1.18it/s]


{'eval_loss': 52.22462463378906,
 'eval_accuracy': 0.13728939085354563,
 'eval_runtime': 1141.2473,
 'eval_samples_per_second': 9.465,
 'eval_steps_per_second': 1.184,
 'epoch': 3.0}