# Simple Transformers - Multi-class Classification 

### Imports

In [8]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import pandas as pd
import logging

  from .autonotebook import tqdm as notebook_tqdm


### Load Requirements

In [9]:
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

## Preparing Data

### Train Data

In [10]:
train_data = [
    ["Aragorn was the heir of Isildur", 1],
    ["Frodo was the heir of Isildur", 0],
    ["Pippin is stronger than Merry", 2],
]
train_df = pd.DataFrame(train_data)
train_df.columns = ["text", "labels"]

### Eval Data

In [11]:
eval_data = [
    ["Aragorn was the heir of Elendil", 1],
    ["Sam was the heir of Isildur", 0],
    ["Merrry is stronger than Pippin", 2],
]
eval_df = pd.DataFrame(eval_data)
eval_df.columns = ["text", "labels"]

## Model

### Model Configuration

In [13]:
model_args = ClassificationArgs(num_train_epochs=1)

In [15]:
model = ClassificationModel(
    'bert',
    'bert-base-cased',
    num_labels=3,
    args=model_args,
    use_cuda=False
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
tokenizer_config.json: 100%|██████████| 49.0/49.0 [00:00<00:00, 21.0kB/s]


## Train Model

In [16]:
model.train_model(train_df)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
0it [00:00, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
1it [00:07,  7.97s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_bert_128_3_2
Epochs 1/1. Running Loss:    1.1867: 100%|██████████| 1/1 [00:07<00:00,  7.73s/it]
Epoch 1 of 1: 100%|██████████| 1/1 [00:10<00:00, 10.81s/it]
INFO:simpletransformers.classification.classification_model: Training of bert model complete. Saved to outputs/.


(1, 1.1867040395736694)

## Eval Model

In [17]:
result, model_outputs, wrong_predictions = model.eval_model(eval_df)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
0it [00:00, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
1it [00:05,  5.12s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_bert_128_3_2
Running Evaluation: 100%|██████████| 1/1 [00:01<00:00,  1.80s/it]


ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

## Predictions

In [21]:
predictions, raw_outputs = model.predict(["Sam was a Wizard"])
print("Label predicted: ", predictions[0])

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
0it [00:00, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
1it [00:05,  5.09s/it]
100%|██████████| 1/1 [00:00<00:00,  1.24it/s]

Label predicted:  1





# Using ITALIAN LEGAL BERT

### Imports

In [22]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import pandas as pd
import logging

### Logging

In [23]:
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

## Preparing Data

### Training

In [25]:
# Preparing train data  (1=citation, 0=no_citation)
train_data = [
    ["sentenza n. 1902/10 del Tribunale di Pisa", 1],
    ["cfr.cass.civ.sez.III 15.7.05 n.15019", 1],
    [" ex art.32 cost", 1],
    ["Un'erdita' s'intende devoluta, quando alcuno la puo' conseguire", 0],
    ["Ha un'apparenza, ma tutt'altra sostanza", 0],
    ["Il creditore no tenne conto delle cose nella loro individualita', ma bensì della quantita'",0]
]

# train dataframe contains text sentence and label pairs
train_df = pd.DataFrame(train_data)
train_df.columns = ["text", "labels"]

### Validation

In [26]:
# Preparing validation data (1=citation, 0=no_citation)
val_data = [ 
  ["Suprema Corte, con sentenza del 15.5.2012, n. 7531", 1],
  ["Sentenza 11 febbraio 2015 n. 11", 1],
  ["Un diritto non ancora esercitabile non e' soggetto a prescrizione", 0],
  ["Chiunque puo' stipulare a favore d'un terzo", 0]
]
# validation dataframe contains text sentence and label pairs
val_df = pd.DataFrame(val_data)
val_df.columns = ["text", "labels"]

### Evaluation

In [27]:
# Preparing eval data (1=citation, 0=no_citation)
eval_data = [
    ["Sez. IV n. 20231/2012 Rv. 252683", 1],
    ["ex artt.2, 29, 30 e 31 cost", 1],
    ["Contro il fatto, ogni contraria dichiarazione e' vana", 0],
    ["Il consenso, e non gia' l'accoppiamento, costituisce il matrimonio", 0],
]
# test dataframe contains text sentence and label pairs
eval_df = pd.DataFrame(eval_data)
eval_df.columns = ["text", "labels"]

## Model

In [29]:
# Simple Model configuration (more advanced: https://simpletransformers.ai/docs/usage/#configuring-a-simple-transformers-model)
model_args = ClassificationArgs(num_train_epochs=4, evaluate_during_training=True)

In [31]:
# Create a ClassificationModel for Italian-Legal-BERT-SC"
# model = ClassificationModel(
#     "camembert", "dlicari/Italian-Legal-BERT-SC", args=model_args, use_cuda=True

# Create a ClassificationModel
model = ClassificationModel(
    "bert", "dlicari/Italian-Legal-BERT", args=model_args, use_cuda=False
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dlicari/Italian-Legal-BERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Training

In [32]:
!rm -r outputs
# Train model
_, training_details = model.train_model(train_df, eval_df=val_df)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
0it [00:00, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
1it [00:05,  5.95s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_bert_128_2_2
Epochs 1/4. Running Loss:    0.6462: 100%|██████████| 1/1 [00:12<00:00, 12.75s/it]
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment var

## Plot results

In [33]:
# show training details
pd.DataFrame(training_details)

Unnamed: 0,global_step,train_loss,mcc,accuracy,f1_score,tp,tn,fp,fn,auroc,auprc,eval_loss
0,1,0.64615,0.0,0.5,0.666667,2,0,2,0,1.0,1.0,0.592601
1,2,0.619703,0.57735,0.75,0.8,2,1,1,0,1.0,1.0,0.52066
2,3,0.547909,0.57735,0.75,0.8,2,1,1,0,1.0,1.0,0.476251
3,4,0.485335,1.0,1.0,1.0,2,2,0,0,1.0,1.0,0.454528


## Evaluation

In [34]:
# perform modeul evalutation on eval set
scores, _, _ = model.eval_model(eval_df)
pd.Series(scores, name='Eval Scores')

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
0it [00:00, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
1it [00:03,  3.99s/it]
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_bert_128_2_2
Running Evaluation: 100%|██████████| 1/1 [00:02<00:00,  2.27s/it]
INFO:simpletransformers.classification.classification_model:{'mcc': 1.0, 'accuracy': 1.0, 'f1_score': 1.0, 'tp': 2, 'tn': 2, 'fp': 0, 'fn': 0, 'auroc': 1.0, 'auprc': 1.0, 'eval_loss': 0.4581565260887146}


mcc          1.000000
accuracy     1.000000
f1_score     1.000000
tp           2.000000
tn           2.000000
fp           0.000000
fn           0.000000
auroc        1.000000
auprc        1.000000
eval_loss    0.458157
Name: Eval Scores, dtype: float64

## Predictions

In [36]:
predictions, raw_outputs = model.predict(["Sam was a Wizard"])
predictions[0]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
0it [00:00, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
1it [00:04,  4.84s/it]
100%|██████████| 1/1 [00:00<00:00,  1.32it/s]


0

In [37]:
raw_outputs

array([[ 0.20004646, -0.04863947]])