### Imports

In [16]:
# list of necessary imports
from google.colab import drive
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import RobertaTokenizer
import torch
from transformers import RobertaForSequenceClassification
from transformers import AutoTokenizer, AutoModel, TFAutoModel
from transformers import Trainer, TrainingArguments
from torch.nn.functional import softmax
from sklearn.metrics import confusion_matrix
from transformers import AutoModelForSequenceClassification

### Dataset

In [None]:
drive.mount('/content/drive') # dataset stored in google drive, can also be a local file
df = pd.read_excel('/content/drive/MyDrive/DSP/punishmentextraction.xlsx') # pd.read_csv if dataset was downloaded in that format

In [3]:
# depending on different verdict prediction, different columns of the df should be selected, Guilty vs not Guilty is selected as an example
court_description = df["Court Description"].tolist()
labels = df["Guilty Binary"].tolist()

train_texts, test_texts, train_labels, test_labels = train_test_split(
    court_description, labels, test_size=0.2, stratify=labels)

### Preparing the data(Chunking)

In [None]:
# Uncomment the model to be run

# tokenizer = AutoTokenizer.from_pretrained("GroNLP/bert-base-dutch-cased")
# tokenizer = RobertaTokenizer.from_pretrained("pdelobelle/robbert-v2-dutch-base")

In [5]:
def chunk_text(text, tokenizer, max_length=512):
  '''
  Function for chunking the court cases

  Arrgs:
    text - court case
    tokernizer - tokenizer
    max_length - maximum length of the court case
  '''
    tokens = tokenizer.tokenize(text)
    chunks = [tokens[i:i + max_length] for i in range(0, len(tokens), max_length)]
    return [tokenizer.convert_tokens_to_string(chunk) for chunk in chunks]

In [None]:
chunked_train_texts = []
chunked_train_labels = []
train_chunks_per_text = []

for text, label in zip(train_texts, train_labels):
    chunks = chunk_text(text, tokenizer)
    chunked_train_texts.extend(chunks)
    chunked_train_labels.extend([label] * len(chunks))
    train_chunks_per_text.append(len(chunks))


chunked_test_texts = []
chunked_test_labels = []
test_chunks_per_text = []

for text, label in zip(test_texts, test_labels):
    chunks = chunk_text(text, tokenizer)
    chunked_test_texts.extend(chunks)
    chunked_test_labels.extend([label] * len(chunks))
    test_chunks_per_text.append(len(chunks))

In [7]:
train_encodings = tokenizer(chunked_train_texts, truncation=True, padding=True, max_length=512)
test_encodings = tokenizer(chunked_test_texts, truncation=True, padding=True, max_length=512)

In [8]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

In [9]:
train_dataset = Dataset(train_encodings, chunked_train_labels)
test_dataset = Dataset(test_encodings, chunked_test_labels)

### Model

In [None]:
# Uncomment the model to be tested

# model = AutoModelForSequenceClassification.from_pretrained("GroNLP/bert-base-dutch-cased", num_labels=2)
# model = RobertaForSequenceClassification.from_pretrained("pdelobelle/robbert-v2-dutch-base", num_labels=2)

In [None]:
# Hyperparametsrs

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=1e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_strategy="epoch",
    log_level="info",
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

### Train

In [None]:
# An Automatic Weights & Biases logging might be required, provied your own
trainer.train()

### Evaluate

In [None]:
trainer.evaluate()

In [None]:
predictions_output = trainer.predict(test_dataset)
logits = predictions_output.predictions
probabilities = softmax(torch.tensor(logits), dim=1)
probs = probabilities[:, 1].tolist()

# assembling the court cases from the chunks and thier predictions
start_idx = 0
chunk_predictions = []
for num_chunks in test_chunks_per_text:
    chunk_predictions.append(probs[start_idx : start_idx + num_chunks])
    start_idx += num_chunks

In [22]:
def aggregate_predictions(predictions, strategy):
  '''
  Function for specifying the way the results are calculated.

  Arrgs:
    predictions - list of predictions
    strategy - max, mean, min
  '''

    if strategy == "max":
        return max(predictions)
    elif strategy == "mean":
        return sum(predictions) / len(predictions)
    elif strategy == "min":
        return min(predictions)

In [23]:
aggregated_predictions = [aggregate_predictions(preds, "mean") for preds in chunk_predictions]

binary_predictions = [1 if prob >= 0.5 else 0 for prob in aggregated_predictions]

In [None]:
# Confusion Matrix for calculating the precision and recall and accaracy
cm = confusion_matrix(test_labels, binary_predictions)
cm