In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/AggDetect/core

/content/drive/MyDrive/AggDetect/core


In [None]:
# File for giving test predictions
# Validation
import config
import utils
import joblib
from sklearn import metrics
import xgboost
import pandas as pd
import models
import seaborn as sns

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
def test(
    clf_pipe,
    task_name,
    target_map,
    string_cleaner,
    seed=0,
    verbose=True,
    flatten=True,
):
    # Returns Score
    if verbose:
        print(f"Testing...{task_name}")

    # Assign Task
    test_data = config.TEST_TASK_A if task_name == 'A' else config.TEST_TASK_B

    df = pd.read_csv(test_data)

    X_val, y_val = utils.get_clean_dataset(
        df,
        target_map,
        train=False,
        task_name=task_name,
        string_cleaner=string_cleaner,
        seed=seed,
        shuffle=False
    )
    # Vectorize

    X_val = utils.reshape_training_data(X_val, flatten=flatten)

    y_preds = clf_pipe.predict(X_val)
    y_true = y_val

    #print(X_val, y_val, y_preds, y_true)

    # Return classification metrics
    return X_val, y_true, y_preds
    

In [None]:
# Script Run
if __name__ == '__main__':
    # Load model for task 1
    utils.seed_all(config.RANDOM_SEED)

    # Task 1
    task_1_pipe = joblib.load(config.MODEL_SAVE_PATH + 'TASK_A_model.pkl')
    X_val_1, Y_true_1, Y_preds_1 = test(
            task_1_pipe,
            'A',
            config.TASK_A_MAP,
            utils.clean_one_text,
            seed = config.RANDOM_SEED,
            verbose=True,
        )

    # Task 2
    task_2_pipe = joblib.load(config.MODEL_SAVE_PATH + 'TASK_B_model.pkl')
    X_val_2, Y_true_2, Y_preds_2 = test(
            task_2_pipe,
            'B',
            config.TASK_B_MAP,
            utils.clean_one_text,
            seed = config.RANDOM_SEED,
            verbose=True,
        )

Testing...A
Testing...B


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import classification_report

In [None]:
m1 = confusion_matrix(Y_true_1, Y_preds_1)
print(m1)
m2 = confusion_matrix(Y_true_2, Y_preds_2)
print(m2)
print(X_val_1, Y_true_1, Y_preds_1)
print(X_val_2, Y_true_2, Y_preds_2)


[[569  74  47]
 [ 52 112  60]
 [ 35  55 196]]
[[908 117]
 [ 69 106]]
['httpswwwyoutubecomwatchv 4iejjszkflo'
 'shame societi cultur tri justifi desir live lifestyl choic against human'
 'wanna say no gay gene truli serious cure accept jesu savior gay agenda everywher tv sublimin convinc interest gender oh also demon spiritu well keep dark prevail'
 ... "that' law call ass" 'feminist definit equal whatev benefit women'
 'expens whatev'] [0 0 0 ... 0 0 0] [0 1 1 ... 1 2 0]
['httpswwwyoutubecomwatchv 4iejjszkflo'
 'shame societi cultur tri justifi desir live lifestyl choic against human'
 'wanna say no gay gene truli serious cure accept jesu savior gay agenda everywher tv sublimin convinc interest gender oh also demon spiritu well keep dark prevail'
 ... "that' law call ass" 'feminist definit equal whatev benefit women'
 'expens whatev'] [0 0 0 ... 0 0 0] [0 0 1 ... 0 0 0]


In [None]:
plt.figure(figsize=(8, 8), dpi=400)
sns.heatmap(m1, 
            vmin=None, 
            vmax=None, 
            cmap='Reds', 
            center=None, 
            robust=False, 
            annot=True, 
            fmt='.0f', 
            annot_kws={
                'fontsize':16,
                'fontweight':'bold',
                #'fontfamily':'serif'
                }, 
            linewidths=1, 
            linecolor='white', 
            cbar=True, 
            cbar_kws=None, 
            cbar_ax=None, 
            square=False,
            xticklabels='auto',
            yticklabels='auto', 
            mask=None, ax=None)
#plt.show();
plt.figure(figsize=(8, 8), dpi=400)
sns.heatmap(m2, 
            vmin=None, 
            vmax=None, 
            cmap='Blues', 
            center=None, 
            robust=False, 
            annot=True, 
            fmt='.0f', 
            annot_kws={
                'fontsize':16,
                'fontweight':'bold',
                #'fontfamily':'serif'
                }, 
            linewidths=1, 
            linecolor='white', 
            cbar=True, 
            cbar_kws=None, 
            cbar_ax=None, 
            square=False,
            xticklabels='auto',
            yticklabels='auto', 
            mask=None, ax=None)
#plt.show();

In [None]:
print(classification_report(Y_true_1, Y_preds_1))
print(classification_report(Y_true_2, Y_preds_2))

              precision    recall  f1-score   support

           0       0.87      0.82      0.85       690
           1       0.46      0.50      0.48       224
           2       0.65      0.69      0.67       286

    accuracy                           0.73      1200
   macro avg       0.66      0.67      0.66      1200
weighted avg       0.74      0.73      0.73      1200

              precision    recall  f1-score   support

           0       0.93      0.89      0.91      1025
           1       0.48      0.61      0.53       175

    accuracy                           0.84      1200
   macro avg       0.70      0.75      0.72      1200
weighted avg       0.86      0.84      0.85      1200



In [None]:
true_list_1, true_list_2, preds_list_1, preds_list_2 = [], [], [], []

for i in Y_true_1:
  if i==0:
    true_list_1.append("NAG")
  elif i==1:
    true_list_1.append("CAG")
  elif i==2:
    true_list_1.append("OAG")
 
for i in Y_true_2:
  if i==0:
    true_list_2.append("NGEN")
  elif i==1:
    true_list_2.append("GEN")

for i in Y_preds_1:
  if i==0:
    preds_list_1.append("NAG")
  elif i==1:
    preds_list_1.append("CAG")
  elif i==2:
    preds_list_1.append("OAG")
 
for i in Y_preds_2:
  if i==0:
    preds_list_2.append("NGEN")
  elif i==1:
    preds_list_2.append("GEN")

print(Y_true_1, true_list_1)
print(Y_true_2, true_list_2)
print(Y_preds_1, preds_list_1)
print(Y_preds_2, preds_list_2)

[0 0 0 ... 0 0 0] ['NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'CAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'OAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'OAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', 'NAG', '

In [None]:
l = list(zip(X_val_1, true_list_1, preds_list_1, true_list_2, preds_list_2))
df = pd.DataFrame(l, columns=['Text Comments', 'Aggression Intent', 'Aggression Prediction', 'Misogyny Intent', 'Misogyny Prediction'])
df.to_csv('AggMisDetectionTest.csv')