## Classification of the three tasks with the Augmented Datasets.

In [1]:
import random

import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.svm import LinearSVC, SVC
import warnings
import math
warnings.filterwarnings("ignore")


def get_classifiers(names, multilabel=False):
    if 'random' in names or 'majority' in names:
        return names

    if multilabel:
        linear_classifier = OneVsRestClassifier(LinearSVC())
        rf_classifier = OneVsRestClassifier(RandomForestClassifier())
        nb_classifier = OneVsRestClassifier(GaussianNB())
        kn_classifier = OneVsRestClassifier(KNeighborsClassifier())
        svc_classifier = OneVsRestClassifier(SVC(kernel='poly'))
    else:
        linear_classifier = LinearSVC()
        rf_classifier = RandomForestClassifier()
        nb_classifier = GaussianNB()
        kn_classifier = KNeighborsClassifier()
        svc_classifier = SVC(kernel='poly')

    classifiers = []
    if 'linearsvc' in names:
        classifiers.append(linear_classifier)
    if 'randomforest' in names:
        classifiers.append(rf_classifier)
    if 'gaussiannb' in names:
        classifiers.append(nb_classifier)
    if 'kneighbors' in names:
        classifiers.append(kn_classifier)
    if 'svc' in names:
        classifiers.append(svc_classifier)
    return classifiers


def get_embeddings(corpus, embedding):  # embedding in [tfidf, sbert, legalbert]
    if embedding == "sbert":
        model = SentenceTransformer('bert-base-nli-mean-tokens')
        sentence_embeddings = model.encode(corpus)
    elif embedding == "legalbert":
        model = SentenceTransformer("nlpaueb/legal-bert-small-uncased")
        sentence_embeddings = model.encode(corpus)
    elif embedding == "tfidf":
        vectorizer = TfidfVectorizer()
        sentence_embeddings = vectorizer.fit_transform(corpus)
        sentence_embeddings = sentence_embeddings.toarray()
        return sentence_embeddings
    else:
        print("wrong embedding name")
        return
    return sentence_embeddings


def argument_classification(df, classifiers, embeddings):
    corpus = df['Text'].values
    random.seed(42)
    for embedding in embeddings:
        X = get_embeddings(corpus, embedding)

        for classifier in get_classifiers(classifiers):
            y_pred_all = None
            y_test_all = None
            for fold in range(1, 6):
                X_train = X[df['Split'] != fold]
                X_test = X[df['Split'] == fold]

                y_train = df[df['Split'] != fold]['Name']
                y_test = df[df['Split'] == fold]['Name']

                if classifier == 'random':
                    labs = list(set(y_train))
                    y_pred = [random.choice(labs) for _ in range(len(X_test))]
                elif classifier == 'majority':
                    labs = set(y_train)
                    maj = 0
                    for l in labs:
                        val = list(y_train).count(l)
                        if val > maj:
                            majority_class = l
                            maj = val
                    y_pred = [majority_class for _ in range(len(X_test))]
                else:
                    classifier.fit(X_train, y_train)
                    y_pred = classifier.predict(X_test)

                y_pred_all = y_pred if y_pred_all is None else np.concatenate([y_pred_all, y_pred])
                y_test_all = y_test if y_test_all is None else np.concatenate([y_test_all, y_test])

            labels = sorted(set(y_train))

            report = classification_report(y_test_all, y_pred_all, target_names=labels)
            print("EMBEDDING:" + embedding + "      CLASSIFIER:" + str(classifier.__class__).split('.')[-1].split("'")[0])
            print(report)
            print("="*40)


def attribute_classification(df, classifiers, embeddings, attribute):
    df = df.dropna(subset=[attribute])

    df[attribute] = df[attribute].apply(lambda x: [x] if not isinstance(x, list) else x)

    df[attribute] = df[attribute].apply(lambda x: sorted(list(set(x))) if x != [] else np.NaN)

    df = df.dropna(subset=[attribute])
    print(df[attribute].value_counts(),len(df))


    corpus = df['Text'].values

    for embedding in embeddings:
        X = get_embeddings(corpus, embedding)

        for classifier in get_classifiers(classifiers, multilabel=True):
            labels = set()
            y_pred_all = None
            y_test_all = None
            for fold in range(1, 6):
                X_train = X[df['Split'] != fold]
                X_test = X[df['Split'] == fold]

                y_train = df[df['Split'] != fold][attribute]
                y_test = df[df['Split'] == fold][attribute]

                ml = MultiLabelBinarizer()
                y_train = ml.fit_transform(y_train)
                y_test = ml.fit_transform(y_test)
                labels = labels.union(ml.classes_)

                if classifier == 'random':
                    y_pred = [random.sample(labels, random.randint(1, len(labels))) for _ in range(len(X_test))]
                    y_pred = ml.transform(y_pred)
                elif classifier == 'majority':
                    mask = []
                    for l in sorted(labels):
                        count = 0
                        for element in list(ml.inverse_transform(y_train)):
                            if l in element:
                                count += 1
                        if count > len(list(y_train))/2:
                            mask.append(True)
                        else:
                            mask.append(False)
                    sample = [1 if el else 0 for el in mask]
                    y_pred = [sample for _ in range(len(X_test))]
                else:
                    classifier.fit(X_train, y_train)
                    y_pred = classifier.predict(X_test)

                y_pred_all = y_pred if y_pred_all is None else np.concatenate([y_pred_all, y_pred])
                y_test_all = y_test if y_test_all is None else np.concatenate([y_test_all, y_test])

            labels = sorted(labels)

            report = classification_report(y_test_all, y_pred_all, target_names=labels)
            print("EMBEDDING:" + embedding + "      CLASSIFIER:" + str(classifier.estimator.__class__).split('.')[-1].split("'")[0])
            print(report)
            print("="*40)

def training_dynamics(prob_y):
    confidence = sum(prob_y)/len(prob_y)
    variability = math.sqrt((sum([(i-confidence)**2 for i in prob_y]))/len(prob_y))
    return confidence,variability

  from .autonotebook import tqdm as notebook_tqdm


## TC

In [2]:
# read data
df_sentences = pd.read_pickle("./updated_tc_df.pkl")
#df_annotations = pd.read_pickle("./updatedsent_df.pkl")

# list of classifiers and embeddings to try
classifiers = ['linearsvc', 'randomforest', 'gaussiannb', 'kneighbors', 'svc']
# classifiers = ['random', 'majority']
embeddings = ['tfidf', 'sbert', 'legalbert']

# argument detection
#argumentmining.argument_classification(df_sentences, classifiers, embeddings)

# type classification
attribute_classification(df_sentences, classifiers, embeddings, 'Type')


Type
[L]       1598
[F]       1469
[F, L]     107
Name: count, dtype: int64 3174
EMBEDDING:tfidf      CLASSIFIER:LinearSVC
              precision    recall  f1-score   support

           F       0.92      0.85      0.88      1576
           L       0.87      0.92      0.89      1705

   micro avg       0.89      0.88      0.89      3281
   macro avg       0.89      0.88      0.89      3281
weighted avg       0.89      0.88      0.89      3281
 samples avg       0.89      0.90      0.89      3281

EMBEDDING:tfidf      CLASSIFIER:RandomForestClassifier
              precision    recall  f1-score   support

           F       0.90      0.91      0.90      1576
           L       0.91      0.89      0.90      1705

   micro avg       0.91      0.90      0.90      3281
   macro avg       0.91      0.90      0.90      3281
weighted avg       0.91      0.90      0.90      3281
 samples avg       0.91      0.91      0.91      3281

EMBEDDING:tfidf      CLASSIFIER:GaussianNB
              pre

No sentence-transformers model found with name C:\Users\utente/.cache\torch\sentence_transformers\nlpaueb_legal-bert-small-uncased. Creating a new one with MEAN pooling.
Some weights of the model checkpoint at C:\Users\utente/.cache\torch\sentence_transformers\nlpaueb_legal-bert-small-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be ex

EMBEDDING:legalbert      CLASSIFIER:LinearSVC
              precision    recall  f1-score   support

           F       0.87      0.85      0.86      1576
           L       0.84      0.87      0.86      1705

   micro avg       0.86      0.86      0.86      3281
   macro avg       0.86      0.86      0.86      3281
weighted avg       0.86      0.86      0.86      3281
 samples avg       0.85      0.87      0.85      3281

EMBEDDING:legalbert      CLASSIFIER:RandomForestClassifier
              precision    recall  f1-score   support

           F       0.84      0.86      0.85      1576
           L       0.86      0.86      0.86      1705

   micro avg       0.85      0.86      0.86      3281
   macro avg       0.85      0.86      0.86      3281
weighted avg       0.85      0.86      0.86      3281
 samples avg       0.85      0.87      0.85      3281

EMBEDDING:legalbert      CLASSIFIER:GaussianNB
              precision    recall  f1-score   support

           F       0.75      0.

## SC

In [3]:
# scheme classification
attribute_classification(df_sentences, classifiers, embeddings, 'Scheme')

Scheme
[Prec]                 576
[Itpr]                 298
[Prec, Rule]           225
[Rule]                 162
[Itpr, Rule]           127
[Class]                 53
[Itpr, Prec]            50
[Itpr, Prec, Rule]      43
[Aut]                   37
[Class, Prec]           28
[Class, Prec, Rule]     15
[Aut, Itpr, Rule]       12
[Prec, Princ]           10
[Aut, Itpr]             10
[Class, Rule]            9
[Princ, Rule]            7
[Princ]                  5
[Aut, Prec]              4
[Itpr, Princ]            2
[Aut, Class, Rule]       2
[Aut, Rule]              2
[Class, Itpr]            2
[Aut, Prec, Princ]       2
[Aut, Itpr, Prec]        2
[Aut, Prec, Rule]        1
Name: count, dtype: int64 1684
EMBEDDING:tfidf      CLASSIFIER:LinearSVC
              precision    recall  f1-score   support

         Aut       1.00      0.83      0.91        72
       Class       0.98      0.78      0.87       109
        Itpr       0.84      0.77      0.81       546
        Prec       0.95     

No sentence-transformers model found with name C:\Users\utente/.cache\torch\sentence_transformers\nlpaueb_legal-bert-small-uncased. Creating a new one with MEAN pooling.
Some weights of the model checkpoint at C:\Users\utente/.cache\torch\sentence_transformers\nlpaueb_legal-bert-small-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be ex

EMBEDDING:legalbert      CLASSIFIER:LinearSVC
              precision    recall  f1-score   support

         Aut       0.94      0.85      0.89        72
       Class       0.80      0.76      0.78       109
        Itpr       0.67      0.62      0.64       546
        Prec       0.91      0.92      0.91       956
       Princ       0.65      0.58      0.61        26
        Rule       0.86      0.82      0.84       605

   micro avg       0.83      0.81      0.82      2314
   macro avg       0.80      0.76      0.78      2314
weighted avg       0.83      0.81      0.82      2314
 samples avg       0.81      0.82      0.80      2314

EMBEDDING:legalbert      CLASSIFIER:RandomForestClassifier
              precision    recall  f1-score   support

         Aut       0.00      0.00      0.00        72
       Class       0.95      0.37      0.53       109
        Itpr       0.79      0.57      0.66       546
        Prec       0.95      0.85      0.89       956
       Princ       0.67    

In [4]:
count_nan = df_sentences['Type'].isna().sum()
count_L = df_sentences[df_sentences['Type'] == 'L'].shape[0]
count_F = df_sentences[df_sentences['Type'] == 'F'].shape[0]
print(f'There are {len(df_sentences)} sentences, but {count_nan} cannot be used.')

There are 3334 sentences, but 160 cannot be used.


## AC

In [5]:
# read data
df_sentences_ac = pd.read_pickle("./updated_df_ac.pkl")

# list of classifiers and embeddings to try
classifiers = ['linearsvc', 'randomforest', 'gaussiannb', 'kneighbors', 'svc']
# classifiers = ['random', 'majority']
embeddings = ['tfidf', 'sbert', 'legalbert']

# argument detection
argument_classification(df_sentences_ac, classifiers, embeddings)

EMBEDDING:tfidf      CLASSIFIER:LinearSVC
              precision    recall  f1-score   support

        conc       0.94      0.92      0.93       480
        prem       0.99      0.99      0.99      3174

    accuracy                           0.98      3654
   macro avg       0.96      0.96      0.96      3654
weighted avg       0.98      0.98      0.98      3654

EMBEDDING:tfidf      CLASSIFIER:RandomForestClassifier
              precision    recall  f1-score   support

        conc       0.93      0.95      0.94       480
        prem       0.99      0.99      0.99      3174

    accuracy                           0.98      3654
   macro avg       0.96      0.97      0.96      3654
weighted avg       0.98      0.98      0.98      3654

EMBEDDING:tfidf      CLASSIFIER:GaussianNB
              precision    recall  f1-score   support

        conc       0.91      0.92      0.91       480
        prem       0.99      0.99      0.99      3174

    accuracy                           0.9

No sentence-transformers model found with name C:\Users\utente/.cache\torch\sentence_transformers\nlpaueb_legal-bert-small-uncased. Creating a new one with MEAN pooling.
Some weights of the model checkpoint at C:\Users\utente/.cache\torch\sentence_transformers\nlpaueb_legal-bert-small-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be ex

EMBEDDING:legalbert      CLASSIFIER:LinearSVC
              precision    recall  f1-score   support

        conc       0.87      0.88      0.88       480
        prem       0.98      0.98      0.98      3174

    accuracy                           0.97      3654
   macro avg       0.93      0.93      0.93      3654
weighted avg       0.97      0.97      0.97      3654

EMBEDDING:legalbert      CLASSIFIER:RandomForestClassifier
              precision    recall  f1-score   support

        conc       0.93      0.89      0.91       480
        prem       0.98      0.99      0.99      3174

    accuracy                           0.98      3654
   macro avg       0.96      0.94      0.95      3654
weighted avg       0.98      0.98      0.98      3654

EMBEDDING:legalbert      CLASSIFIER:GaussianNB
              precision    recall  f1-score   support

        conc       0.77      0.88      0.82       480
        prem       0.98      0.96      0.97      3174

    accuracy                  