In [2]:
from datasets import load_dataset
import pandas as pd
import matplotlib.pyplot as plt
import torch
from tqdm import tqdm
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModelForMaskedLM, set_seed
from sklearn import svm
from sklearn import metrics
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
dataset = load_dataset("GroNLP/dutch-cola")

In [4]:
train_df = pd.DataFrame.from_dict(dataset["train"])
dev_df = pd.DataFrame.from_dict(dataset["validation"])
test_df = pd.DataFrame.from_dict(dataset["test"])


In [5]:
train_df

Unnamed: 0,Source,Original ID,Acceptability,Original annotation,Sentence,Material added
0,SoD-Noun2,4.1a,1,,Ik geef een paar voorbeelden.,1
1,SoD-Noun2,4.1b,1,,Ik geef twee voorbeelden.,1
2,SoD-Noun2,4.1b,1,,Ik geef enkele voorbeelden.,1
3,SoD-Noun2,4.3a,1,,Ik heb een paar schoenen.,1
4,SoD-Noun2,4.3b,1,,Ik zag het paar schoenen.,1
...,...,...,...,...,...,...
19888,SoD-Zw,5.1,1,,Hij ging naar de prachtige eilanden.,0
19889,SoD-Zw,5.2a,1,,Hij wilde naar die prachtige eilanden.,1
19890,SoD-Zw,5.2a,1,,Ze wees naar deze prachtige eilanden.,1
19891,SoD-Zw,5.2b,1,,Ze gingen naar hun prachtige eilanden.,0


In [6]:
# Initiate Model
model_name = 'GroNLP/bert-base-dutch-cased' # or other model if preferred
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertModel.from_pretrained(model_name)

Some weights of BertModel were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
train_df = train_df.sample(frac=1, random_state=42) # shuffle train data

In [7]:
# Create embeddings of train data
train_cls_embeddings = {
  1: [],
  2: [],
  3: [],
  4: [],
  5: [],
  6: [],
  7: [],
  8: [],
  9: [],
  10: [],
  11: [],
  12: []
}
for line in tqdm(train_df["Sentence"]):
  tokenized_text = tokenizer(line, return_tensors="pt")
  with torch.no_grad():
    line_embedding = model(**tokenized_text, output_hidden_states=True) # extract embedding for sentence

  for i in range(1,13):
    train_cls_embeddings[i].append(line_embedding.hidden_states[i][:, -1, :]) # store embedding

100%|██████████| 19893/19893 [18:16<00:00, 18.13it/s]


In [8]:
# Create embeddings of test data
test_cls_embeddings = {
  1: [],
  2: [],
  3: [],
  4: [],
  5: [],
  6: [],
  7: [],
  8: [],
  9: [],
  10: [],
  11: [],
  12: []
}
for line in tqdm(test_df["Sentence"]):
  tokenized_text = tokenizer(line, return_tensors="pt")
  with torch.no_grad():
    line_embedding = model(**tokenized_text, output_hidden_states=True) # extract embedding for sentence

  for i in range(1,13):
    test_cls_embeddings[i].append(line_embedding.hidden_states[i][:, -1, :]) # store embedding

100%|██████████| 2400/2400 [02:13<00:00, 18.01it/s]


In [9]:
for layer in range(1,13):
  print(layer)
  print("===============")

  # Create numpy objects of embeddings
  X_train = [tensor.numpy() for tensor in train_cls_embeddings[layer]]
  X_test = [tensor.numpy() for tensor in test_cls_embeddings[layer]] # set to dev for testing accuracy

  # extract labels from CoLA
  y_train = train_df['Acceptability']
  y_test = test_df['Acceptability']

  # initiate SVC model
  clf = svm.SVC(kernel='linear')

  # train the model
  clf.fit(X_train, y_train)

  print("Model trained.")

  # predict labels
  y_pred = clf.predict(X_test)

  ac_score = metrics.accuracy_score(y_test, y_pred)
  rc_score = metrics.recall_score(y_test, y_pred)
  pr_score = metrics.precision_score(y_test, y_pred)
  f1_score = metrics.f1_score(y_test, y_pred)

  print("---------------")
  print(f"Accuracy: {ac_score:.3f}\nRecall: {rc_score:.3f}\nPrecision: {pr_score:.3f}\nF1: {f1_score:.3f}")
  print(f"{ac_score:.3f}, {rc_score:.3f}, {pr_score:.3f}, {f1_score:.3f}")
  print("===============")



    

10
Model trained.
---------------
Accuracy: 0.662
Recall: 0.839
Precision: 0.620
F1: 0.713
0.662, 0.839, 0.620, 0.713
11
Model trained.
---------------
Accuracy: 0.674
Recall: 0.849
Precision: 0.629
F1: 0.722
0.674, 0.849, 0.629, 0.722
12
Model trained.
---------------
Accuracy: 0.674
Recall: 0.848
Precision: 0.629
F1: 0.722
0.674, 0.848, 0.629, 0.722
