### Import Libraries

In [1]:
!pip install transformers
!pip install tensorboardx
!pip install simpletransformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m67.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m106.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.28.1
Looking in indexes: https://pypi.org/simple, https:/

In [2]:
import pandas as pd
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import sklearn
from sklearn.metrics import accuracy_score

### Import dataset

In [3]:
train_dataset = pd.read_csv('./ILDC/ILDC_single/train_dataset.csv')
dev_dataset = pd.read_csv('./ILDC/ILDC_single/dev_dataset.csv')
test_dataset = pd.read_csv('./ILDC/ILDC_single/test_dataset.csv')
print(f'Train Dataset: {train_dataset.shape}')
print(f'Dev Dataset: {dev_dataset.shape}')
print(f'Test Dataset: {test_dataset.shape}')

Train Dataset: (5082, 2)
Dev Dataset: (994, 2)
Test Dataset: (1517, 2)


In [4]:
texts = []
for text in train_dataset['text']:
    texts.append(' '.join(text.split(' ')[-500:]))
train_dataset['text'] = texts
texts = []
for text in test_dataset['text']:
    texts.append(' '.join(text.split(' ')[-500:]))
test_dataset['text'] = texts

### Train Model (RoBERTa)




In [5]:
model_args = ClassificationArgs()
model_args.num_train_epochs = 5
model_args.learning_rate = 1e-5
model_args.overwrite_output_dir = True

In [6]:
model = ClassificationModel('roberta', 'roberta-base', num_labels=2, args = model_args)

Downloading (…)lve/main/config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [7]:
model.train_model(train_dataset)



  0%|          | 0/5082 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

(3180, 0.6097961275832459)

In [8]:
result, model_outputs, wrong_predictions = model.eval_model(test_dataset, acc = accuracy_score)
result



  0%|          | 0/1517 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/190 [00:00<?, ?it/s]

{'mcc': 0.086109033435398,
 'tp': 320,
 'tn': 501,
 'fp': 254,
 'fn': 442,
 'auroc': 0.5736802767203769,
 'auprc': 0.5831571823318277,
 'acc': 0.5411997363216875,
 'eval_loss': 0.8624204074081622}

### Train Model (bert-base-uncased)

In [9]:
model_args = ClassificationArgs()
model_args.num_train_epochs = 5
model_args.learning_rate = 1e-5
model_args.overwrite_output_dir = True

In [10]:
model = ClassificationModel('bert', 'bert-base-uncased', num_labels=2, args = model_args)

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [11]:
model.train_model(train_dataset)



  0%|          | 0/5082 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

(3180, 0.5777484593901244)

In [12]:
result, model_outputs, wrong_predictions = model.eval_model(test_dataset, acc = accuracy_score)
result



  0%|          | 0/1517 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/190 [00:00<?, ?it/s]

{'mcc': 0.06349393237272088,
 'tp': 128,
 'tn': 662,
 'fp': 93,
 'fn': 634,
 'auroc': 0.5637491091759226,
 'auprc': 0.5500329868788982,
 'acc': 0.5207646671061306,
 'eval_loss': 0.9968703702876442}

### Train Model (legal-bert-base-uncased)




In [13]:
model_args = ClassificationArgs()
model_args.num_train_epochs = 5
model_args.learning_rate = 1e-5
model_args.overwrite_output_dir = True

In [14]:
model = ClassificationModel('bert', 'nlpaueb/legal-bert-base-uncased', num_labels=2, args = model_args)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at nlpaueb/legal-bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification wer

Downloading (…)okenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/222k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [15]:
model.train_model(train_dataset)



  0%|          | 0/5082 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/636 [00:00<?, ?it/s]



Running Epoch 1 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

(3180, 0.5682440187946056)

In [16]:
result, model_outputs, wrong_predictions = model.eval_model(test_dataset, acc = accuracy_score)
result



  0%|          | 0/1517 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/190 [00:00<?, ?it/s]

{'mcc': 0.13426757564125727,
 'tp': 289,
 'tn': 563,
 'fp': 192,
 'fn': 473,
 'auroc': 0.6008134744746311,
 'auprc': 0.5938960727671758,
 'acc': 0.5616348055372445,
 'eval_loss': 0.876118655894932}

### Train Model (saibo/legal-roberta-base)

In [17]:
model_args = ClassificationArgs()

model_args.num_train_epochs = 5
model_args.learning_rate = 1e-5
model_args.overwrite_output_dir = True

In [18]:
model = ClassificationModel('roberta', 'saibo/legal-roberta-base', num_labels=2, args = model_args)

Downloading (…)lve/main/config.json:   0%|          | 0.00/578 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of the model checkpoint at saibo/legal-roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at saibo/legal-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.wei

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

In [19]:
model.train_model(train_dataset)



  0%|          | 0/5082 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/636 [00:00<?, ?it/s]

(3180, 0.5994749567043857)

In [20]:
result, model_outputs, wrong_predictions = model.eval_model(test_dataset, acc = accuracy_score)
result



  0%|          | 0/1517 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/190 [00:00<?, ?it/s]

{'mcc': 0.12517373975758742,
 'tp': 136,
 'tn': 685,
 'fp': 70,
 'fn': 626,
 'auroc': 0.5849324711894457,
 'auprc': 0.5939263006390844,
 'acc': 0.5411997363216875,
 'eval_loss': 0.9848184685958059}