In [1]:
import jsonlines
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import f1_score, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

### Baseline - 2 : Classical ML Models

For the classical Ml Models baseline, we have implemented and tested the dataset on various models which includes logistic regression, support vector machines, k-nearest neighbors, decision trees, and random forests. Taking inspiration from the ACL 2020 Diplomacy paper which used Logistic Regression, we extended the approach by using other ML models as well. We have used the same evaluation metrics : Accuracy, Macro F1 and Lie F1 score as given the paper so that we can have a proper comparison. 

In [2]:
class Preprocessing_class:
    def __init__(self, file_path, label_type="sender"):
        self.file_path = file_path
        self.label_type = label_type.lower() 
        self.data = None
        self.aggregated_messages = None

    def load_data(self):
        with jsonlines.open(self.file_path, 'r') as reader:
            self.data = list(reader)
        return self.data
    
    def process_dialog(self, dialog):
        messages = dialog.get('messages', [])
        senders = dialog.get('sender_labels', [])
        receivers = dialog.get('receiver_labels', [])
        return [{'message': msg, 'sender': senders[i], 'receiver': receivers[i]}  for i, msg in enumerate(messages)]
    
    def aggregate_dialogs(self):
        if self.data is None:self.load_data()
        return [msg for dialog in self.data for msg in self.process_dialog(dialog)]
    
    def aggregate_data(self):
        self.aggregated_messages = self.aggregate_dialogs()
        return self.aggregated_messages
    
    def to_bool(self, label):
        return label if isinstance(label, bool) else (True if label.lower() == 'true' else False)
    
    def is_valid_label(self, label):
        return label in {True, False, 'true', 'false'}
    
    def filter_message(self, msg):
        if self.is_valid_label(msg.get('receiver')):
            return { 'message': msg['message'],'sender': self.to_bool(msg['sender']), 'receiver': self.to_bool(msg['receiver'])}
        return None

    def filter_valid_messages(self):
        if self.aggregated_messages is None: self.aggregate_data()
        return [filtered for msg in self.aggregated_messages 
                if (filtered := self.filter_message(msg)) is not None]
    
    def get_text_and_labels(self):
        valid_msgs = self.filter_valid_messages()
        texts = [msg['message'] for msg in valid_msgs]
        get_label = lambda msg: msg['sender'] if self.label_type == "sender" else msg['receiver']
        labels = [0 if get_label(msg) else 1 for msg in valid_msgs]
        return texts, labels

    def vectorize_texts(self, texts, vectorizer=None):
        if vectorizer is None:
            vectorizer = CountVectorizer(stop_words='english')
            X = vectorizer.fit_transform(texts)
        else: X = vectorizer.transform(texts)
        return X, vectorizer

In [3]:
class Ml_Model_class:
    def __init__(self, X_train, y_train, X_val, y_val, X_test, y_test):
        self.X_train, self.y_train = X_train, y_train
        self.X_val, self.y_val = X_val, y_val
        self.X_test, self.y_test = X_test, y_test

    def fit_predict(self, model):
        model.fit(self.X_train, self.y_train)
        return model.predict(self.X_val), model.predict(self.X_test)

    def compute_metrics(self, preds, y):
        macro_f1 = lambda p, y_true: f1_score(y_true, p, average='macro', zero_division=0)
        lie_f1   = lambda p, y_true: f1_score(y_true, p, pos_label=1, average='binary', zero_division=0)
        acc      = lambda p, y_true: accuracy_score(y_true, p)
        return { "Macro F1": macro_f1(preds, y), "Lie F1": lie_f1(preds, y), "Accuracy": acc(preds, y)}

    def print_metrics(self, metrics, set_name):
        formatted = {k: round(v, 3) for k, v in metrics.items()}
        print(f"{set_name}:")
        for key, value in formatted.items(): print(f"  {key:<12}: {value}")

    def evaluate_model(self, model, model_name):
        preds_val, preds_test = self.fit_predict(model)
        metrics_test = self.compute_metrics(preds_test, self.y_test)
        
        print(model_name)
        self.print_metrics(metrics_test, "Test")
    
    def logistic_regression(self):
        model = LogisticRegression(class_weight='balanced', max_iter=1000)
        self.evaluate_model(model, "Logistic Regression:")

    def svm(self):
        model = SVC(class_weight='balanced')
        self.evaluate_model(model, "Support Vector Machine:")

    def knn(self):
        model = KNeighborsClassifier()
        self.evaluate_model(model, "K-Nearest Neighbors:")


In [4]:
train_file = '/Users/varun/Desktop/College/sem6/NLP/Group Project/Data/test.jsonl'
val_file = '/Users/varun/Desktop/College/sem6/NLP/Group Project/Data/validation.jsonl'
test_file = '/Users/varun/Desktop/College/sem6/NLP/Group Project/Data/test.jsonl'
    
train_set = Preprocessing_class(train_file, label_type="sender")
train_set.load_data()
train_set.aggregate_data()
train_texts, train_labels = train_set.get_text_and_labels()
X_train, vectorizer = train_set.vectorize_texts(train_texts)
    
val_set = Preprocessing_class(val_file, label_type="sender")
val_set.load_data()
val_set.aggregate_data()
val_texts, val_labels = val_set.get_text_and_labels()
X_val, _ = val_set.vectorize_texts(val_texts, vectorizer)

test_set = Preprocessing_class(test_file, label_type="sender")
test_set.load_data()
test_set.aggregate_data()
test_texts, test_labels = test_set.get_text_and_labels()
X_test, _ = test_set.vectorize_texts(test_texts, vectorizer)

baseline_2 = Ml_Model_class(X_train, train_labels, X_val, val_labels, X_test, test_labels)

In [5]:
baseline_2.logistic_regression()
baseline_2.svm()
baseline_2.knn()

Logistic Regression:
Test:
  Macro F1    : 0.862
  Lie F1      : 0.752
  Accuracy    : 0.949
Support Vector Machine:
Test:
  Macro F1    : 0.952
  Lie F1      : 0.912
  Accuracy    : 0.985
K-Nearest Neighbors:
Test:
  Macro F1    : 0.537
  Lie F1      : 0.116
  Accuracy    : 0.92
