#  Sexism Detection and Categorization in Tweets Using Machine Learning and NLP

This project aims to detect sexism in tweets and classify it into specific categories using machine learning models and NLP techniques. The task is divided into two subtasks:

* Sexism Detection – Identifying whether a tweet contains sexist content.

* Sexism Categorization – Classifying detected sexism into four categories: 'JUDGEMENTAL', 'REPORTED', 'DIRECT', and 'UNKNOWN'.

It compares:
* TF-IDF-based LSA
* RoBERTa contextual embeddings

as feature extraction methods. Three classifiers:
* Logistic Regression
* Decision Trees,
* MLP

are trained and evaluated to identify sexist content. The dataset consists of English and Spanish tweets labeled for different levels of sexism.

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [68]:
import pandas as pd
import torch
from transformers import AutoModel, AutoTokenizer
from transformers import RobertaTokenizer, RobertaModel
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score, classification_report

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, classification_report

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score, classification_report

import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')
spanish_stopwords = stopwords.words('spanish')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
import sys
sys.path.append('/content/drive/MyDrive')


from readerEXIST2025 import EXISTReader

# reader_train = EXISTReader("EXIST2025_training.json")
# reader_dev = EXISTReader("EXIST2025_dev.json")
reader_train = EXISTReader("drive/MyDrive/EXIST2025_training.json")
reader_dev = EXISTReader("drive/MyDrive/EXIST2025_dev.json")

EnTrainTask1, EnDevTask1 = reader_train.get(lang="EN", subtask="1"), reader_dev.get(lang="EN", subtask="1")
EnTrainTask2, EnDevTask2 = reader_train.get(lang="EN", subtask="2"), reader_dev.get(lang="EN", subtask="2")

SpTrainTask1, SpDevTask1 = reader_train.get(lang="ES", subtask="1"), reader_dev.get(lang="ES", subtask="1")
SpTrainTask2, SpDevTask2 = reader_train.get(lang="ES", subtask="2"), reader_dev.get(lang="ES", subtask="2")


# ENGLISH

## Preprocessing

In [None]:
import re
web_re = re.compile(r"https?:\/\/[^\s]+", re.U)
user_re = re.compile(r"(@\w+\-?(?:\w+)?)", re.U)
hashtag_re = re.compile(r"(#\w+\-?(?:\w+)?)", re.U)

mapLabelToId = {"task1": {'NO': 0, 'YES': 1, "AMBIGUOUS": 2},
                "task2": {'-': 4, 'JUDGEMENTAL': 0, 'REPORTED': 1, 'DIRECT': 2, 'UNKNOWN': 3, "AMBIGUOUS": 5},
                "task3": {'OBJECTIFICATION': 0, 'STEREOTYPING-DOMINANCE': 1, 'MISOGYNY-NON-SEXUAL-VIOLENCE': 2,
                          'IDEOLOGICAL-INEQUALITY': 3, 'SEXUAL-VIOLENCE': 4, 'UNKNOWN': 5, '-': 6,
                          "AMBIGUOUS": 7}}

mapIdToLabel = {"task1": {0: 'NO', 1: 'YES', 2: "AMBIGUOUS"},
                "task2": {4: '-', 0: 'JUDGEMENTAL', 1: 'REPORTED', 2: 'DIRECT', 3: 'UNKNOWN', 4: "AMBIGUOUS"},
                "task3": {0: 'OBJECTIFICATION', 1: 'STEREOTYPING-DOMINANCE', 2: 'MISOGYNY-NON-SEXUAL-VIOLENCE',
                          3: 'IDEOLOGICAL-INEQUALITY', 4: 'SEXUAL-VIOLENCE', 5: 'UNKNOWN', 6: '-',
                          7: "AMBIGUOUS"}}


def standard_preprocession(text):
    text = web_re.sub("", text)
    text = user_re.sub("", text)
    text = hashtag_re.sub("", text)
    text = text.lower()

    return text


def no_preprocession(text):
    return text

def unpack(data, task):
    id,text, label = data
    id = [id.iloc[i] for i in range(len(id))]
    sptext = [standard_preprocession(text.iloc[i]) for i in range(len(text))]

    label = [mapLabelToId[task][label.iloc[i]] for i in range(len(label))]

    return {"id": id, "sptext": sptext, "label": label}

## Tweet representations (Feature extraction)

In [None]:
# Obtaining a representation for the train and dev subsets in both tasks
if torch.backends.mps.is_available():  # Mac M? GPU
    device = torch.device("mps")
elif torch.cuda.is_available():  # Nvidia GPU
    device = torch.device("cuda")
else:  # CPU
    device = torch.device("cpu")
print(device)


cpu


In [74]:
def get_contextual_embeddings(text, model_name):
    batch_size = 16
    tokenizer = AutoTokenizer.from_pretrained(model_name) #"roberta-base"
    model = AutoModel.from_pretrained(model_name)

    tensor_list=[]
    for i in range(0, len(text), batch_size):
        batch = text[i:i+batch_size]

        input = tokenizer(batch, padding="max_length", max_length = 100, truncation=True, return_tensors="pt")
        model.eval()
        model.to(device)
        input = input.to(device)
        with torch.no_grad():
          outputs = model(**input)
          encoded_layers = outputs[0]
          cls_vector = encoded_layers[:,0,:]

        tensor_list.append(cls_vector)
    cls_vector = torch.cat(tensor_list).cpu()
    return cls_vector


# LSA based on TF-IDF of words (100 singular values)
def LSA_TF_IDF_repre(data, model_name, lang):
    if lang == "english":
        stop_words = "english"
    elif lang == "spanish":
        stop_words = stopwords.words("spanish")
    else:
        stop_words = None

    tfidf_vectorizer = TfidfVectorizer(stop_words = stop_words, binary=False, use_idf=True, preprocessor=None)
    tfidf_matrix = tfidf_vectorizer.fit_transform(data)

    num_features = tfidf_matrix.shape[1]
    n_components = min(100, num_features)

    svd = TruncatedSVD(n_components=n_components)
    svd_matrix = svd.fit_transform(tfidf_matrix)

    return svd_matrix

In [62]:
def get_repre(train, test, method, model_name, task, lang, sample_size = -1):
    train_data1 = unpack(train, task)
    test_data1 = unpack(test, task)

    if sample_size != -1:
      for data in [train_data1, test_data1]:
          for key, valie in data.items():
            data[key] = data[key][:sample_size]

    train_data1["repre"] = method(train_data1["sptext"], model_name, lang)
    test_data1["repre"] = method(test_data1["sptext"], model_name, lang)

    return train_data1, test_data1

In [None]:
train_con_embed, test_con_embed = get_repre(EnTrainTask1, EnDevTask1,
                                      get_contextual_embeddings, "roberta-base",
                                            "task1","english", -1)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
train_lsa_repre, test_lsa_repre = get_repre(EnTrainTask1, EnDevTask1,
                                      LSA_TF_IDF_repre, "",
                                      "task1","english", -1)

## Learning Models - subtask 1

In [95]:
def log_reg(x_train, y_train, x_dev, y_dev):
    clf1 = LogisticRegression(
      penalty='l2',
      C=1.0,
      solver='liblinear', #'saga' 'l1'
      max_iter=200
    )
    clf1.fit(x_train, y_train)
    predicted1 = clf1.predict(x_dev)

    f1_positive = f1_score(y_dev, predicted1, pos_label=1)
    print(f"F1-score (Positive Class): {f1_positive}")

    report = classification_report(y_dev,predicted1, digits=4)
    print(report)

def decision_tree_sub1(X_train, y_train, X_dev, y_dev):
    clf = DecisionTreeClassifier(random_state=42)
    clf.fit(X_train, y_train)
    predicted = clf.predict(X_dev)

    f1_positive = f1_score(y_dev, predicted, pos_label=1)
    print(f"F1-score (Positive Class): {f1_positive}")

    report = classification_report(y_dev,predicted, digits=4)
    print(report)

def MLP_sub1(X_train, y_train, X_dev, y_dev):
    clf = MLPClassifier(random_state = 1,
                        hidden_layer_sizes = (256, 128),
                        activation='relu',
                        max_iter = 1500,
                        learning_rate_init = 0.0005,
                        alpha=0.0001,
                        early_stopping=True,
                        solver='lbfgs') # adam

    clf.fit(X_train, y_train)
    predicted = clf.predict(X_dev)

    f1_positive = f1_score(y_dev, predicted, pos_label=1)
    print(f"F1-score (Positive Class): {f1_positive}")

    report = classification_report(y_dev, predicted, digits=4)
    print(report)


In [None]:
def train(train_data, test_data, method):
    method(train_data["repre"], train_data["label"],
           test_data["repre"], test_data["label"])


# Subtask 1 - Results - English

In [None]:
print("Representation: LSA based on TD-IDF with 50 components")
print("Clasiffier: Logistic regression")
train(train_lsa_repre, test_lsa_repre, log_reg)

Representation: LSA based on TD-IDF with 50 components
Clasiffier: Logistic regression
F1-score (Positive Class): 0.5469613259668509
              precision    recall  f1-score   support

           0     0.6558    0.7240    0.6882       250
           1     0.5893    0.5103    0.5470       194

    accuracy                         0.6306       444
   macro avg     0.6225    0.6172    0.6176       444
weighted avg     0.6267    0.6306    0.6265       444



In [None]:
print("Representation: Contextual embeddings using RoBERTa")
print("Clasiffier: Logistic regression")
train(train_con_embed, test_con_embed, log_reg)

Representation: Contextual embeddings using RoBERTa
Clasiffier: Logistic regression
F1-score (Positive Class): 0.7648725212464589
              precision    recall  f1-score   support

           0     0.7930    0.9040    0.8449       250
           1     0.8491    0.6959    0.7649       194

    accuracy                         0.8131       444
   macro avg     0.8210    0.7999    0.8049       444
weighted avg     0.8175    0.8131    0.8099       444



In [None]:
print("Representation: LSA based on TD-IDF with 50 components")
print("Clasiffier: Decison Tree")
train(train_lsa_repre, test_lsa_repre, decision_tree_sub1)

Representation: LSA based on TD-IDF with 50 components
Clasiffier: Decison Tree
F1-score (Positive Class): 0.4824120603015075
              precision    recall  f1-score   support

           0     0.5917    0.5680    0.5796       250
           1     0.4706    0.4948    0.4824       194

    accuracy                         0.5360       444
   macro avg     0.5311    0.5314    0.5310       444
weighted avg     0.5388    0.5360    0.5371       444



In [None]:
print("Representation: Contextual embeddings using RoBERTa")
print("Clasiffier: Decison Tree")
train(train_con_embed, test_con_embed, decision_tree_sub1)

Representation: Contextual embeddings using RoBERTa
Clasiffier: Decison Tree
F1-score (Positive Class): 0.6106870229007634
              precision    recall  f1-score   support

           0     0.6980    0.6840    0.6909       250
           1     0.6030    0.6186    0.6107       194

    accuracy                         0.6554       444
   macro avg     0.6505    0.6513    0.6508       444
weighted avg     0.6565    0.6554    0.6559       444



In [96]:
print("Representation: LSA based on TD-IDF with 50 components")
print("Clasiffier: MLP")
train(train_lsa_repre, test_lsa_repre, MLP_sub1)

Representation: LSA based on TD-IDF with 50 components
Clasiffier: MLP
F1-score (Positive Class): 0.5055762081784386
              precision    recall  f1-score   support

           0     0.4131    0.3843    0.3982       229
           1     0.4910    0.5211    0.5056       261

    accuracy                         0.4571       490
   macro avg     0.4521    0.4527    0.4519       490
weighted avg     0.4546    0.4571    0.4554       490



In [97]:
print("Representation: Contextual embeddings using RoBERTa")
print("Clasiffier: MLP")
train(train_con_embed, test_con_embed, MLP_sub1)

Representation: Contextual embeddings using RoBERTa
Clasiffier: MLP
F1-score (Positive Class): 0.7423076923076923
              precision    recall  f1-score   support

           0     0.7056    0.7118    0.7087       229
           1     0.7452    0.7395    0.7423       261

    accuracy                         0.7265       490
   macro avg     0.7254    0.7256    0.7255       490
weighted avg     0.7267    0.7265    0.7266       490



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


# Learning Models - Subtask 2 - English

In [None]:
train_con_embed2, test_con_embed2 = get_repre(EnTrainTask2, EnDevTask2,
                                      get_contextual_embeddings, "roberta-base",
                                            "task2", "english", -1)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
train_lsa_repre2, test_lsa_repre2 = get_repre(EnTrainTask2, EnDevTask2,
                                      LSA_TF_IDF_repre, "",
                                      "task2", "english", -1)

In [92]:
def log_reg_sub2(x_train, y_train, x_dev, y_dev):
    clf = LogisticRegression(
      penalty='l2',
      C=1.0,
      solver='liblinear', #'saga' 'l1'
      max_iter=200
    )
    clf.fit(x_train, y_train)
    predicted = clf.predict(x_dev)

    f1_macro = f1_score(y_dev, predicted, average='macro')
    print(f"F1-score (Macro-Averaged): {f1_macro}")

    report = classification_report(y_dev,predicted, digits=4)
    print(report)


def decision_tree_sub2(X_train, y_train, X_dev, y_dev):
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    predicted = clf.predict(X_dev)

    f1_macro = f1_score(y_dev, predicted, average='macro')
    print(f"F1-score (Macro-Averaged): {f1_macro}")

    report = classification_report(y_dev, predicted, digits=4)
    print(report)


def MLP_sub2(X_train, y_train, X_dev, y_dev):
    clf = MLPClassifier(random_state = 1,
                        max_iter = 1500,
                        learning_rate_init = 0.0005,
                        early_stopping=True,
                        solver='lbfgs')
    clf.fit(X_train, y_train)
    predicted = clf.predict(X_dev)

    f1_macro = f1_score(y_dev, predicted, average='macro')
    print(f"F1-score (Macro-Averaged): {f1_macro}")

    report = classification_report(y_dev, predicted, digits=4)
    print(report)



# Subtask 2 - Results - English

In [None]:
print("Representation: LSA based on TD-IDF with 50 components")
print("Clasiffier: Logistic regression")
train(train_lsa_repre2, test_lsa_repre2, log_reg_sub2)


Representation: LSA based on TD-IDF with 50 components
Clasiffier: Logistic regression
F1-score (Macro-Averaged): 0.24277660324171949
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000        28
           1     0.1000    0.0303    0.0465        33
           2     0.5556    0.8824    0.6818        85

    accuracy                         0.5205       146
   macro avg     0.2185    0.3042    0.2428       146
weighted avg     0.3460    0.5205    0.4075       146



In [None]:
print("Representation: Contextual embeddings using RoBERTa")
print("Clasiffier: Logistic regression")
train(train_con_embed2, test_con_embed2, log_reg_sub2)

Representation: Contextual embeddings using RoBERTa
Clasiffier: Logistic regression
F1-score (Macro-Averaged): 0.43342670401493927
              precision    recall  f1-score   support

           0     0.5000    0.1071    0.1765        28
           1     0.5333    0.2424    0.3333        33
           2     0.6640    0.9765    0.7905        85

    accuracy                         0.6438       146
   macro avg     0.5658    0.4420    0.4334       146
weighted avg     0.6030    0.6438    0.5694       146



In [None]:
print("Representation: LSA based on TD-IDF with 50 components")
print("Clasiffier: Decison Tree")
train(train_lsa_repre2, test_lsa_repre2, decision_tree_sub2)

Representation: LSA based on TD-IDF with 50 components
Clasiffier: Decison Tree
F1-score (Macro-Averaged): 0.377480054192383
              precision    recall  f1-score   support

           0     0.3243    0.4286    0.3692        28
           1     0.1750    0.2121    0.1918        33
           2     0.6377    0.5176    0.5714        85

    accuracy                         0.4315       146
   macro avg     0.3790    0.3861    0.3775       146
weighted avg     0.4730    0.4315    0.4468       146



In [None]:
print("Representation: Contextual embeddings using RoBERTa")
print("Clasiffier: Decison Tree")
train(train_con_embed2, test_con_embed2, decision_tree_sub2)

Representation: Contextual embeddings using RoBERTa
Clasiffier: Decison Tree
F1-score (Macro-Averaged): 0.4150762281197064
              precision    recall  f1-score   support

           0     0.2927    0.4286    0.3478        28
           1     0.3182    0.2121    0.2545        33
           2     0.6506    0.6353    0.6429        85

    accuracy                         0.5000       146
   macro avg     0.4205    0.4253    0.4151       146
weighted avg     0.5068    0.5000    0.4985       146



In [94]:
print("Representation: LSA based on TD-IDF with 50 components")
print("Clasiffier: MLP")
train(train_lsa_repre2, test_lsa_repre2, MLP_sub2)

Representation: LSA based on TD-IDF with 50 components
Clasiffier: MLP
F1-score (Macro-Averaged): 0.33407578084997436
              precision    recall  f1-score   support

           0     0.1786    0.1786    0.1786        28
           1     0.2941    0.1515    0.2000        33
           2     0.5743    0.6824    0.6237        85

    accuracy                         0.4658       146
   macro avg     0.3490    0.3375    0.3341       146
weighted avg     0.4351    0.4658    0.4425       146



In [93]:
print("Representation: Contextual embeddings using RoBERTa")
print("Clasiffier: MLP")
train(train_con_embed2, test_con_embed2, MLP_sub2)

Representation: Contextual embeddings using RoBERTa
Clasiffier: MLP
F1-score (Macro-Averaged): 0.5433364468634179
              precision    recall  f1-score   support

           0     0.4444    0.3922    0.4167        51
           1     0.4595    0.4250    0.4416        40
           2     0.7440    0.8017    0.7718       116

    accuracy                         0.6280       207
   macro avg     0.5493    0.5396    0.5433       207
weighted avg     0.6152    0.6280    0.6205       207



# SPANISH

## Tweet representations (Feature extraction)

In [51]:
train_con_embed, test_con_embed = get_repre(SpTrainTask1, SpDevTask1,
                                      get_contextual_embeddings, "PlanTL-GOB-ES/roberta-base-bne",
                                            "task1", "spanish", -1)

tokenizer_config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/851k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/509k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.21M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/957 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-bne and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-bne and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
train_lsa_repre, test_lsa_repre = get_repre(SpTrainTask1, SpDevTask1,
                                      LSA_TF_IDF_repre, "",
                                      "task1", "spanish", -1)

## Subtask 1 - Spanish

In [100]:
print("Representation: LSA based on TD-IDF with 50 components")
print("Clasiffier: Logistic regression")
train(train_lsa_repre, test_lsa_repre, log_reg)

Representation: LSA based on TD-IDF with 50 components
Clasiffier: Logistic regression
F1-score (Positive Class): 0.468503937007874
              precision    recall  f1-score   support

           0     0.4156    0.4410    0.4280       229
           1     0.4818    0.4559    0.4685       261

    accuracy                         0.4490       490
   macro avg     0.4487    0.4485    0.4482       490
weighted avg     0.4509    0.4490    0.4496       490



In [54]:
print("Representation: Contextual embeddings using RoBERTa")
print("Clasiffier: Logistic regression")
train(train_con_embed, test_con_embed, log_reg) #0

Representation: Contextual embeddings using RoBERTa
Clasiffier: Logistic regression
F1-score (Positive Class): 0.8180039138943248
              precision    recall  f1-score   support

           0     0.7833    0.8210    0.8017       229
           1     0.8360    0.8008    0.8180       261

    accuracy                         0.8102       490
   macro avg     0.8097    0.8109    0.8099       490
weighted avg     0.8114    0.8102    0.8104       490



In [98]:
print("Representation: LSA based on TD-IDF with 50 components")
print("Clasiffier: Decison Tree")
train(train_lsa_repre, test_lsa_repre, decision_tree_sub1) #51

Representation: LSA based on TD-IDF with 50 components
Clasiffier: Decison Tree
F1-score (Positive Class): 0.5140712945590994
              precision    recall  f1-score   support

           0     0.4312    0.4105    0.4206       229
           1     0.5037    0.5249    0.5141       261

    accuracy                         0.4714       490
   macro avg     0.4674    0.4677    0.4673       490
weighted avg     0.4698    0.4714    0.4704       490



In [56]:
print("Representation: Contextual embeddings using RoBERTa")
print("Clasiffier: Decison Tree")
train(train_con_embed, test_con_embed, decision_tree_sub1) #42

Representation: Contextual embeddings using RoBERTa
Clasiffier: Decison Tree
F1-score (Positive Class): 0.6859344894026975
              precision    recall  f1-score   support

           0     0.6422    0.6507    0.6464       229
           1     0.6899    0.6820    0.6859       261

    accuracy                         0.6673       490
   macro avg     0.6661    0.6663    0.6662       490
weighted avg     0.6676    0.6673    0.6675       490



In [99]:
print("Representation: LSA based on TD-IDF with 50 components")
print("Clasiffier: MLP")
train(train_lsa_repre, test_lsa_repre, MLP_sub1) #50 50

Representation: LSA based on TD-IDF with 50 components
Clasiffier: MLP
F1-score (Positive Class): 0.5055762081784386
              precision    recall  f1-score   support

           0     0.4131    0.3843    0.3982       229
           1     0.4910    0.5211    0.5056       261

    accuracy                         0.4571       490
   macro avg     0.4521    0.4527    0.4519       490
weighted avg     0.4546    0.4571    0.4554       490



In [101]:
print("Representation: Contextual embeddings using RoBERTa")
print("Clasiffier: MLP")
train(train_con_embed, test_con_embed, MLP_sub1) #38 74


Representation: Contextual embeddings using RoBERTa
Clasiffier: MLP
F1-score (Positive Class): 0.7423076923076923
              precision    recall  f1-score   support

           0     0.7056    0.7118    0.7087       229
           1     0.7452    0.7395    0.7423       261

    accuracy                         0.7265       490
   macro avg     0.7254    0.7256    0.7255       490
weighted avg     0.7267    0.7265    0.7266       490



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


# Subtask 2 - Spanish

In [59]:
train_con_embed2_sp, test_con_embed2_sp = get_repre(SpTrainTask2, SpDevTask2,
                                      get_contextual_embeddings, "PlanTL-GOB-ES/roberta-base-bne",
                                            "task2", "spanish", -1)

Some weights of RobertaModel were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-bne and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-bne and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [75]:
train_lsa_repre2_sp, test_lsa_repre2_sp = get_repre(SpTrainTask2, SpDevTask2,
                                      LSA_TF_IDF_repre, "",
                                      "task2", "spanish", -1)

In [76]:
print("Representation: LSA based on TD-IDF with 50 components")
print("Clasiffier: Logistic regression")
train(train_lsa_repre2_sp, test_lsa_repre2_sp, log_reg_sub2)

Representation: LSA based on TD-IDF with 50 components
Clasiffier: Logistic regression
F1-score (Macro-Averaged): 0.27550898807395796
              precision    recall  f1-score   support

           0     0.2727    0.0588    0.0968        51
           1     0.1667    0.0250    0.0435        40
           2     0.5526    0.9052    0.6863       116

    accuracy                         0.5266       207
   macro avg     0.3307    0.3297    0.2755       207
weighted avg     0.4091    0.5266    0.4168       207



In [79]:
print("Representation: Contextual embeddings using RoBERTa")
print("Clasiffier: Logistic regression")
train(train_con_embed2_sp, test_con_embed2_sp, log_reg_sub2)

Representation: Contextual embeddings using RoBERTa
Clasiffier: Logistic regression
F1-score (Macro-Averaged): 0.5103891766617198
              precision    recall  f1-score   support

           0     0.5238    0.2157    0.3056        51
           1     0.5161    0.4000    0.4507        40
           2     0.6774    0.9052    0.7749       116

    accuracy                         0.6377       207
   macro avg     0.5725    0.5070    0.5104       207
weighted avg     0.6084    0.6377    0.5966       207



In [78]:
print("Representation: LSA based on TD-IDF with 50 components")
print("Clasiffier: Decison Tree")
train(train_lsa_repre2_sp, test_lsa_repre2_sp, decision_tree_sub2)

Representation: LSA based on TD-IDF with 50 components
Clasiffier: Decison Tree
F1-score (Macro-Averaged): 0.3331608589849728
              precision    recall  f1-score   support

           0     0.2222    0.1961    0.2083        51
           1     0.1905    0.3000    0.2330        40
           2     0.6061    0.5172    0.5581       116

    accuracy                         0.3961       207
   macro avg     0.3396    0.3378    0.3332       207
weighted avg     0.4312    0.3961    0.4091       207



In [80]:
print("Representation: Contextual embeddings using RoBERTa")
print("Clasiffier: Decison Tree")
train(train_con_embed2_sp, test_con_embed2_sp, decision_tree_sub2)

Representation: Contextual embeddings using RoBERTa
Clasiffier: Decison Tree
F1-score (Macro-Averaged): 0.45842812158601626
              precision    recall  f1-score   support

           0     0.4091    0.3529    0.3789        51
           1     0.2941    0.3750    0.3297        40
           2     0.6786    0.6552    0.6667       116

    accuracy                         0.5266       207
   macro avg     0.4606    0.4610    0.4584       207
weighted avg     0.5379    0.5266    0.5307       207



In [102]:
print("Representation: LSA based on TD-IDF with 50 components")
print("Clasiffier: MLP")
train(train_lsa_repre2_sp, test_lsa_repre2_sp, MLP_sub2)

Representation: LSA based on TD-IDF with 50 components
Clasiffier: MLP
F1-score (Macro-Averaged): 0.3324342324342324
              precision    recall  f1-score   support

           0     0.3077    0.1569    0.2078        51
           1     0.1892    0.1750    0.1818        40
           2     0.5486    0.6810    0.6077       116

    accuracy                         0.4541       207
   macro avg     0.3485    0.3376    0.3324       207
weighted avg     0.4198    0.4541    0.4269       207



In [103]:
print("Representation: Contextual embeddings using RoBERTa")
print("Clasiffier: MLP")
train(train_con_embed2_sp, test_con_embed2_sp, MLP_sub2)  #54

Representation: Contextual embeddings using RoBERTa
Clasiffier: MLP
F1-score (Macro-Averaged): 0.5433364468634179
              precision    recall  f1-score   support

           0     0.4444    0.3922    0.4167        51
           1     0.4595    0.4250    0.4416        40
           2     0.7440    0.8017    0.7718       116

    accuracy                         0.6280       207
   macro avg     0.5493    0.5396    0.5433       207
weighted avg     0.6152    0.6280    0.6205       207

