In [78]:
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.dummy import DummyClassifier
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB

In [79]:
## Read csv file
df = pd.read_csv('train_all_tasks.csv')

In [80]:
## Data pre-processing

In [81]:
# text and labels for task A
X_train_a, X_test_a, y_train_a, y_test_a = train_test_split(df['text'], df['label_sexist'], test_size=0.3, random_state=0)
X_test_a, X_dev_a, y_test_a, y_dev_a = train_test_split(X_test_a, y_test_a, test_size=0.33)

# text and labels for task B
X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(df['text'], df['label_category'], test_size=0.3, random_state=0)
X_test_b, X_dev_b, y_test_b, y_dev_b = train_test_split(X_test_b, y_test_b, test_size=0.33)

In [82]:
print(Counter(y_train_a))
print(Counter(y_dev_a))
print(Counter(y_test_a))

print(Counter(y_train_b))
print(Counter(y_dev_b))
print(Counter(y_test_b))

Counter({'not sexist': 7413, 'sexist': 2387})
Counter({'not sexist': 1053, 'sexist': 333})
Counter({'not sexist': 2136, 'sexist': 678})
Counter({'none': 7413, '2. derogation': 1119, '3. animosity': 808, '4. prejudiced discussions': 236, '1. threats, plans to harm and incitement': 224})
Counter({'none': 1048, '2. derogation': 155, '3. animosity': 119, '1. threats, plans to harm and incitement': 33, '4. prejudiced discussions': 31})
Counter({'none': 2141, '2. derogation': 316, '3. animosity': 238, '4. prejudiced discussions': 66, '1. threats, plans to harm and incitement': 53})


In [83]:
def report(model_name, ydev, pred, zero_division=1):
    print("\n=== {0} ===\n\nClassification Report:\n{1}"
          "\nConfusion Matrix:\n{2}".format(model_name,
                                            classification_report(ydev, pred, 
                                                                  zero_division=zero_division),
                                            confusion_matrix(ydev, pred)))

In [85]:
def naive_bayes(xtrain, ytrain, xdev, ydev, le):
  model_name = "naive bayes"
  model = MultinomialNB()
  vec = CountVectorizer()

  vec.fit(xtrain, xdev)
  xtrain_enc = vec.transform(xtrain)
  xdev_enc = vec.transform(xdev)

  model.fit(xtrain_enc, ytrain)
  pred = model.predict(xdev_enc)
  pred = le.inverse_transform(pred)

  report(model_name, ydev, pred)

In [84]:
def svm_train(xtrain, ytrain, xdev, ydev, le):
  model_name = "SVM"
  model = SVC(kernel='linear')
  vec = CountVectorizer()

  vec.fit(xtrain, xdev)
  xtrain_enc = vec.transform(xtrain)
  xdev_enc = vec.transform(xdev)

  model.fit(xtrain_enc, ytrain)
  pred = model.predict(xdev_enc)
  pred = le.inverse_transform(pred)

  report(model_name, ydev, pred)

In [86]:
def majority_class(xtrain, ytrain, xdev, ydev, le):
  model_name = "majority class"
  model = DummyClassifier()
  vec = CountVectorizer()

  vec.fit(xtrain, xdev)
  xtrain_enc = vec.transform(xtrain)
  xdev_enc = vec.transform(xdev)

  model.fit(xtrain_enc, ytrain)
  pred = model.predict(xdev_enc)
  pred = le.inverse_transform(pred)

  report(model_name, ydev, pred, zero_division=0)

In [87]:
def decision_tree_train(xtrain, ytrain, xdev, ydev, le):
  model_name = "decision tree"
  model = DecisionTreeClassifier()
  vec = CountVectorizer()

  vec.fit(xtrain, xdev)
  xtrain_enc = vec.transform(xtrain)
  xdev_enc = vec.transform(xdev)

  model.fit(xtrain_enc, ytrain)
  pred = model.predict(xdev_enc)
  pred = le.inverse_transform(pred)

  report(model_name, ydev, pred)

In [88]:
## Running Task A (Binary) 

In [89]:
le = LabelEncoder()

le.fit(y_train_a)
y_train_a = le.transform(y_train_a)

In [90]:
naive_bayes(X_train_a, y_train_a, X_test_a, y_test_a, le)


=== naive bayes ===

Classification Report:
              precision    recall  f1-score   support

  not sexist       0.82      0.95      0.88      2136
      sexist       0.71      0.35      0.47       678

    accuracy                           0.81      2814
   macro avg       0.76      0.65      0.67      2814
weighted avg       0.79      0.81      0.78      2814

Confusion Matrix:
[[2039   97]
 [ 443  235]]


In [91]:
majority_class(X_train_a, y_train_a, X_test_a, y_test_a, le)


=== majority class ===

Classification Report:
              precision    recall  f1-score   support

  not sexist       0.76      1.00      0.86      2136
      sexist       0.00      0.00      0.00       678

    accuracy                           0.76      2814
   macro avg       0.38      0.50      0.43      2814
weighted avg       0.58      0.76      0.66      2814

Confusion Matrix:
[[2136    0]
 [ 678    0]]


In [92]:
svm_train(X_train_a, y_train_a, X_test_a, y_test_a, le)


=== SVM ===

Classification Report:
              precision    recall  f1-score   support

  not sexist       0.86      0.89      0.88      2136
      sexist       0.61      0.55      0.58       678

    accuracy                           0.81      2814
   macro avg       0.74      0.72      0.73      2814
weighted avg       0.80      0.81      0.80      2814

Confusion Matrix:
[[1901  235]
 [ 306  372]]


In [93]:
decision_tree_train(X_train_a, y_train_a, X_test_a, y_test_a, le)


=== decision tree ===

Classification Report:
              precision    recall  f1-score   support

  not sexist       0.85      0.88      0.86      2136
      sexist       0.57      0.50      0.53       678

    accuracy                           0.79      2814
   macro avg       0.71      0.69      0.70      2814
weighted avg       0.78      0.79      0.78      2814

Confusion Matrix:
[[1882  254]
 [ 338  340]]


In [94]:
## Running Task B (Multi-class) 

In [95]:
le = LabelEncoder()

le.fit(y_train_b)
y_train_b = le.transform(y_train_b)

In [96]:
naive_bayes(X_train_b, y_train_b, X_test_b, y_test_b, le)


=== naive bayes ===

Classification Report:
                                          precision    recall  f1-score   support

1. threats, plans to harm and incitement       1.00      0.00      0.00        53
                           2. derogation       0.55      0.11      0.19       316
                            3. animosity       0.33      0.02      0.04       238
               4. prejudiced discussions       1.00      0.00      0.00        66
                                    none       0.78      0.99      0.87      2141

                                accuracy                           0.77      2814
                               macro avg       0.73      0.23      0.22      2814
                            weighted avg       0.72      0.77      0.69      2814

Confusion Matrix:
[[   0    0    0    0   53]
 [   0   36    6    0  274]
 [   0   20    5    0  213]
 [   0    0    0    0   66]
 [   0    9    4    0 2128]]


In [97]:
majority_class(X_train_b, y_train_b, X_test_b, y_test_b, le)


=== majority class ===

Classification Report:
                                          precision    recall  f1-score   support

1. threats, plans to harm and incitement       0.00      0.00      0.00        53
                           2. derogation       0.00      0.00      0.00       316
                            3. animosity       0.00      0.00      0.00       238
               4. prejudiced discussions       0.00      0.00      0.00        66
                                    none       0.76      1.00      0.86      2141

                                accuracy                           0.76      2814
                               macro avg       0.15      0.20      0.17      2814
                            weighted avg       0.58      0.76      0.66      2814

Confusion Matrix:
[[   0    0    0    0   53]
 [   0    0    0    0  316]
 [   0    0    0    0  238]
 [   0    0    0    0   66]
 [   0    0    0    0 2141]]


In [98]:
svm_train(X_train_b, y_train_b, X_test_b, y_test_b, le)


=== SVM ===

Classification Report:
                                          precision    recall  f1-score   support

1. threats, plans to harm and incitement       0.14      0.15      0.15        53
                           2. derogation       0.37      0.36      0.36       316
                            3. animosity       0.37      0.25      0.30       238
               4. prejudiced discussions       0.23      0.14      0.17        66
                                    none       0.86      0.90      0.88      2141

                                accuracy                           0.75      2814
                               macro avg       0.39      0.36      0.37      2814
                            weighted avg       0.73      0.75      0.74      2814

Confusion Matrix:
[[   8    5    5    1   34]
 [  10  114   39    8  145]
 [  11   70   60    5   92]
 [   0    4    6    9   47]
 [  27  116   52   16 1930]]


In [99]:
decision_tree_train(X_train_b, y_train_b, X_test_b, y_test_b, le)


=== decision tree ===

Classification Report:
                                          precision    recall  f1-score   support

1. threats, plans to harm and incitement       0.08      0.06      0.07        53
                           2. derogation       0.35      0.29      0.32       316
                            3. animosity       0.35      0.29      0.32       238
               4. prejudiced discussions       0.17      0.11      0.13        66
                                    none       0.85      0.91      0.88      2141

                                accuracy                           0.75      2814
                               macro avg       0.36      0.33      0.34      2814
                            weighted avg       0.72      0.75      0.73      2814

Confusion Matrix:
[[   3   11    5    0   34]
 [   3   93   61    5  154]
 [   6   52   69    5  106]
 [   1    6    9    7   43]
 [  24  104   52   23 1938]]
