In [1]:
import warnings
warnings.filterwarnings( 'ignore' )
import gc
import os
import time
import numpy as np
import pandas as pd
import pickle

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

import tensorflow as tf
import keras.backend as K
from keras.models import load_model
from keras.preprocessing import text, sequence


from tqdm import tqdm_notebook
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.util import ngrams

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


import warnings
warnings.filterwarnings( 'ignore' )

from sklearn.naive_bayes import BernoulliNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
#from thundersvm import SVC
from sklearn.metrics import confusion_matrix, roc_auc_score, accuracy_score


from keras.models import Model
from keras.layers import Input, Embedding
from keras.layers import CuDNNGRU, CuDNNLSTM, Conv1D, Conv2D, Dense, Bidirectional, GRU, LSTM, MaxPool1D
from keras.layers import SpatialDropout1D, Dropout, Concatenate, concatenate, Softmax, Flatten, Reshape
from keras.layers import GlobalMaxPooling1D, GlobalAveragePooling1D, GlobalMaxPooling2D, GlobalAveragePooling2D
from keras.utils import multi_gpu_model
from keras.optimizers import *

Using TensorFlow backend.


from tensorflow.random import set_seed
from numpy.random import seed
import random


seed_value = 0
set_seed(seed_value)
seed(seed_value)
os.environ['PYTHONHASHSEED'] = str(seed_value)
random.seed(seed_value)

In [2]:
# hyper parameters for this model

penalty = 'l2'
C = 18
solver = 'newton-cg'
class_weight='balanced'

In [3]:
modelname = 'LR'

modelpath = './Models/' + modelname + '/'

if not os.path.exists( modelpath ):
    os.makedirs( modelpath )
if not os.path.exists( './Results/' ):
    os.makedirs( './Results/' )

In [4]:
def hms_string(sec_elapsed):
    h = int(sec_elapsed / (60 * 60))
    m = int((sec_elapsed % (60 * 60)) / 60)
    s = sec_elapsed % 60
    return "{}:{:>02}:{:>05.2f}".format(h, m, s)

In [5]:
def convert_lower_case(data):
    # Convert the input text into lowercase text
    return np.char.lower(str(data))

def remove_stop_words(data):
    # Tokenize the input text and remove stopwords from the corpus
    stop_words = stopwords.words('english')
    lemmatizer = WordNetLemmatizer()
    words = word_tokenize(str(data))
    new_text = ""
    for w in words:
        if w not in stop_words and len(w) > 3:
            new_text = new_text + " " + lemmatizer.lemmatize(w)
    return new_text

def remove_punctuation(data):
    # Remove punctuations defined below from input text
    symbols = "!\"#$%&()*+-./:;<=>?@[\]^_`{|}~\n"
    for i in range(len(symbols)):
        data = np.char.replace(data, symbols[i], ' ')
        data = np.char.replace(data, "  ", " ")
    data = np.char.replace(data, ',', '')
    return data

def remove_apostrophe(data):
    # Remove apostrophe from the input text
    return np.char.replace(data, "'", "")

def preprocess(data):
    # Preprocess the input text
    data = convert_lower_case(data)
    data = remove_punctuation(data) #remove comma seperately
    data = remove_apostrophe(data)
    data = remove_stop_words(data)
    return data

def get_tokens(dataframe, column):
    tokens = []
    for i in tqdm_notebook(dataframe[column][:]):
        _tokens = word_tokenize(str(i))
        tokens.append(_tokens)
        
    return tokens

In [6]:
train_data = pd.read_csv('data\\wiki_debias_train.csv')
train_data = train_data.dropna(axis = 0)
#train_data = train_data.sample(n=100000, random_state=0)
train_data['toxicity'] = train_data['toxicity'].round()

df_test = pd.read_csv('test_data.csv')
df_test = df_test.dropna(axis = 0)
df_test.loc[df_test['Label'] == 'BAD', 'Label'] = 1
df_test.loc[df_test['Label'] == 'NOT_BAD', 'Label'] = 0


train_feature = get_tokens(train_data, 'comment')
train_label = train_data['toxicity']

test_feature = get_tokens(df_test, 'Text')
test_label = df_test['Label']

identity_terms = []
for i in tqdm_notebook(range(len(df_test['Text']))):
    _comment = df_test.loc[i,  'Text'].split(" ")
    if len(_comment) < 3:
        _term = _comment[1]
        identity_terms.append(_term)
identity_terms = list(set(identity_terms))


terms = []
for i in range(len(df_test['Text'])):
    _text = df_test.loc[i, 'Text'].split(' ')
    _term = list(set(_text).intersection(set(identity_terms)))
    if len(_term) > 0:
        terms.append(_term[0])
    else:
        terms.append(np.nan)
        
df_test['Identity_Terms'] = terms

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=95692.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=76564.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=76564.0), HTML(value='')))




In [7]:
vectorizer = TfidfVectorizer()
vectorizer.fit( train_data['comment'] )

xtrain = vectorizer.transform( train_data['comment'].values )
xtest = vectorizer.transform( df_test[ 'Text' ].values )
ytrain = train_data[ 'toxicity' ]
ytest = df_test[ 'Label' ]

In [8]:
LR = LogisticRegression( penalty=penalty, C=C, solver=solver, class_weight=class_weight )
LR.fit( xtrain, ytrain )

LogisticRegression(C=18, class_weight='balanced', solver='newton-cg')

In [9]:
LR_pred =  LR.predict(xtrain)
accuracy_score(ytrain, LR_pred)

0.9877314718053756

In [10]:
from sklearn.tree import DecisionTreeClassifier
DT = DecisionTreeClassifier(random_state=0, max_depth = 100)
DT.fit(xtrain, ytrain)

DecisionTreeClassifier(max_depth=100, random_state=0)

In [11]:
DT_pred =  DT.predict(xtrain)
accuracy_score(ytrain, DT_pred)

0.983781298332149

In [12]:
from sklearn.svm import LinearSVC
SVC = LinearSVC(random_state=0, tol=1e-05)
SVC.fit(xtrain, ytrain)

LinearSVC(random_state=0, tol=1e-05)

In [13]:
SVC_pred =  SVC.predict(xtrain)
accuracy_score(ytrain, SVC_pred)

0.9881285791915729

In [14]:
xf_positive = 0
xd_positive = 0
xf_total = 0
xd_total = 0

for i in tqdm_notebook(range(len(train_feature))):
    if(train_data['toxicity'].values[i] == 1 and len(list(set(train_feature[i]).intersection(set(terms)))) > 0):
        xd_positive += 1
        xd_total += 1
    elif(len(list(set(train_feature[i]).intersection(set(terms)))) > 0):
        xd_total += 1
    elif(train_data['toxicity'].values[i] == 1 and len(list(set(train_feature[i]).intersection(set(terms))))==0):
        xf_positive += 1
        xf_total += 1
    elif(len(list(set(train_feature[i]).intersection(set(terms))))== 0):
        xf_total += 1

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=95692.0), HTML(value='')))




In [15]:
pf = xf_positive / xf_total
pd = xd_positive / xd_total
discrimination = pf - pd
discrimination

-0.07876544878242223

In [16]:
pred = []
for i in range(len(LR_pred)):
    _sum = sum([LR_pred[i], SVC_pred[i], DT_pred[i]])
    if _sum >= 2:
        pred.append(int(1))
    else:
        pred.append(int(0))

In [17]:
accuracy_score(train_data['toxicity'].astype(float), pred)

0.992611712577854

In [18]:
xf_positive = 0
xd_positive = 0
xf_total = 0
xd_total = 0

for i in tqdm_notebook(range(len(train_feature))):
    if(pred[i] == 1 and len(list(set(train_feature[i]).intersection(set(terms)))) > 0):
        xd_positive += 1
        xd_total += 1
    elif(len(list(set(train_feature[i]).intersection(set(terms)))) > 0):
        xd_total += 1
    elif(pred[i] == 1 and len(list(set(train_feature[i]).intersection(set(terms))))==0):
        xf_positive += 1
        xf_total += 1
    elif(len(list(set(train_feature[i]).intersection(set(terms))))== 0):
        xf_total += 1

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=95692.0), HTML(value='')))




In [19]:
pf = xf_positive / xf_total
pd = xd_positive / xd_total
discrimination = pf - pd
discrimination

-0.07996873614881377

In [20]:
LR_pred =  LR.predict(xtest)
print(accuracy_score(df_test['Label'].astype(float), LR_pred))

0.7870147850164568


In [21]:
DT_pred =  DT.predict(xtest)
print(accuracy_score(df_test['Label'].astype(float), DT_pred))

0.62180659317695


In [22]:
SVC_pred =  SVC.predict(xtest)
print(accuracy_score(df_test['Label'].astype(float), SVC_pred.round()))

0.7398777493338906


In [23]:
pred = []
for i in range(len(LR_pred)):
    _sum = sum([LR_pred[i], SVC_pred[i], DT_pred[i]])
    if _sum >= 2:
        pred.append(int(1))
    else:
        pred.append(int(0))
        
df_test['prediction_scores'] = pred

In [24]:
accuracy_score(df_test['Label'].astype('float'), pred)

0.7402695783919335

In [25]:
xf_positive = 0
xd_positive = 0
xf_total = 0
xd_total = 0

for i in tqdm_notebook(range(len(test_feature))):
    if(pred[i] == 1 and len(list(set(test_feature[i]).intersection(set(terms)))) > 0):
        xd_positive += 1
        xd_total += 1
    elif(len(list(set(test_feature[i]).intersection(set(terms)))) > 0):
        xd_total += 1
    elif(pred[i] == 1 and len(list(set(test_feature[i]).intersection(set(terms))))==0):
        xf_positive += 1
        xf_total += 1
    elif(len(list(set(test_feature[i]).intersection(set(terms))))== 0):
        xf_total += 1

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=76564.0), HTML(value='')))




In [26]:
pf = xf_positive / xf_total
pd = xd_positive / xd_total
discrimination = pf - pd
discrimination

0.0597518258275056

In [27]:
import pandas as pd
def perf_measure(y_actual, y_hat):
    TP = 0
    FP = 0
    TN = 0
    FN = 0

    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==1:
           TP += 1
        if y_hat[i]==1 and y_actual[i]!=y_hat[i]:
           FP += 1
        if y_actual[i]==y_hat[i]==0:
           TN += 1
        if y_hat[i]==0 and y_actual[i]!=y_hat[i]:
           FN += 1

    return(TN, FP, FN, TP)



total_tn, total_fp, total_fn, total_tp = confusion_matrix(df_test['Label'].astype(float), pred).ravel()
total_fpr = total_fp / (total_fp + total_tn )
total_fnr = total_fn / (total_fn + total_tp)
false_positive = []
false_negative = []
identity_terms = []
for identity_term in set(terms):
    data = df_test[df_test['Identity_Terms'] == identity_term].reset_index()
    y_true, y_pred = data['Label'].astype(int), data['prediction_scores']
    tn, fp, fn, tp = perf_measure(y_true, y_pred.round())
    try:
        fpr = fp / (fp + tn)
        fnr = fn / (fn + tp)
        false_positive.append(fpr)
        false_negative.append(fnr)
        identity_terms.append(identity_term)
    except:
        print("Error in ", identity_term)

    
eval_scores = pd.DataFrame(identity_terms, columns = ['Identity_Titles'])
eval_scores['Identity_Term_False_Positive'] = false_positive
eval_scores['Total_False_Positive'] = total_fpr
eval_scores['Identity_Term_False_Negatives'] = false_negative
eval_scores['Total_False_Negative'] = total_fnr
eval_scores['FPR - FPRt'] = abs(total_fpr - eval_scores['Identity_Term_False_Positive'])
eval_scores['FNR - FNRt'] = abs(total_fnr - eval_scores['Identity_Term_False_Negatives'])
eval_scores

Error in  nan


Unnamed: 0,Identity_Titles,Identity_Term_False_Positive,Total_False_Positive,Identity_Term_False_Negatives,Total_False_Negative,FPR - FPRt,FNR - FNRt
0,asian,0.0,0.044486,0.546896,0.474975,0.044486,0.07192
1,black,0.044914,0.044486,0.31572,0.474975,0.000428,0.159255
2,younger,0.0,0.044486,0.569353,0.474975,0.044486,0.094378
3,bisexual,0.0,0.044486,0.726552,0.474975,0.044486,0.251577
4,female,0.002642,0.044486,0.457067,0.474975,0.041844,0.017908
5,muslim,0.0,0.044486,0.479524,0.474975,0.044486,0.004549
6,latinx,0.001321,0.044486,0.451783,0.474975,0.043165,0.023192
7,indian,0.001321,0.044486,0.455746,0.474975,0.043165,0.019229
8,catholic,0.003963,0.044486,0.384412,0.474975,0.040523,0.090563
9,queer,0.137384,0.044486,0.162483,0.474975,0.092899,0.312492


In [28]:
eval_scores['FPR - FPRt'].sum(), eval_scores['FNR - FNRt'].sum()


(3.7745708220929064, 4.139291658640871)

In [29]:
total_auc = roc_auc_score(df_test['Label'].astype(float), pred)
terms_auc = []
identity_terms = []
for identity_term in set(terms):
    term_data = df_test[df_test['Identity_Terms'] == identity_term].reset_index()
    data = df_test.sample(n=len(term_data['Text']), random_state=0)
    data = term_data.append(data, ignore_index=True)
    y_true, y_pred = data['Label'].astype(int), data['prediction_scores']

    try:
        term_auc = roc_auc_score(y_true, y_pred.round())
        terms_auc.append(term_auc)
        identity_terms.append(identity_term)
    except:
        print("Error in ",identity_term)


    
eval_scores = pd.DataFrame(identity_terms, columns = ['Identity_Titles'])
eval_scores['AUCt'] = terms_auc
eval_scores['AUC'] = total_auc
eval_scores['AUC - AUCt'] = abs(eval_scores['AUC'] - eval_scores['AUCt'])
eval_scores

Error in  nan


Unnamed: 0,Identity_Titles,AUCt,AUC,AUC - AUCt
0,asian,0.734879,0.74027,0.00539
1,black,0.780318,0.74027,0.040048
2,younger,0.729356,0.74027,0.010913
3,bisexual,0.690695,0.74027,0.049575
4,female,0.7563,0.74027,0.016031
5,muslim,0.751449,0.74027,0.011179
6,latinx,0.757935,0.74027,0.017666
7,indian,0.756961,0.74027,0.016691
8,catholic,0.773833,0.74027,0.033564
9,queer,0.794499,0.74027,0.054229


In [30]:
print(eval_scores['AUC - AUCt'].sum())

1.0345399490692861


In [31]:
EM = []
for i in tqdm_notebook(range(len(test_feature))):
    deprived_term = list(set(test_feature[i]).intersection(set(terms)))
    if sum([LR_pred[i], SVC_pred[i], DT_pred[i]]) == 3:
        EM.append(int(1))
    elif sum([LR_pred[i], SVC_pred[i], DT_pred[i]]) == 0:
        EM.append(int(0))
    elif len(deprived_term) >0:
        EM.append(int(0))
    elif len(deprived_term)==0:
        EM.append(pred[i])

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=76564.0), HTML(value='')))




In [32]:
xf_positive = 0
xd_positive = 0
xf_total = 0
xd_total = 0

for i in tqdm_notebook(range(len(test_feature))):
    if(EM[i] == 1 and len(list(set(test_feature[i]).intersection(set(terms)))) > 0):
        xd_positive += 1
        xd_total += 1
    elif(len(list(set(test_feature[i]).intersection(set(terms)))) > 0):
        xd_total += 1
    elif(EM[i] == 1 and len(list(set(test_feature[i]).intersection(set(terms))))== 0):
        xf_positive += 1
        xf_total += 1
    elif(len(list(set(test_feature[i]).intersection(set(terms))))== 0):
        xf_total += 1

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=76564.0), HTML(value='')))




In [33]:
pf = xf_positive / xf_total
pd = xd_positive / xd_total
discrimination = pf - pd
discrimination

0.1942603022696016

In [34]:
accuracy_score(df_test['Label'].astype(float), EM)

0.6273575048325584

In [35]:
#test_data['prediction_scores'] = ROC_pred
import pandas as pd
total_tn, total_fp, total_fn, total_tp = confusion_matrix(df_test['Label'].astype(float), EM).ravel()
total_fpr = total_fp / (total_fp + total_tn )
total_fnr = total_fn / (total_fn + total_tp)
false_positive = []
false_negative = []
identity_terms = []
for identity_term in set(terms):
    data = df_test[df_test['Identity_Terms'] == identity_term].reset_index()
    y_true, y_pred = data['Label'].astype(int), data['prediction_scores']
    tn, fp, fn, tp = perf_measure(y_true, y_pred.round())
    try:
        fpr = fp / (fp + tn)
        fnr = fn / (fn + tp)
        false_positive.append(fpr)
        false_negative.append(fnr)
        identity_terms.append(identity_term)
    except:
        print("Error in ", identity_term)

    
eval_scores = pd.DataFrame(identity_terms, columns = ['Identity_Titles'])
eval_scores['Identity_Term_False_Positive'] = false_positive
eval_scores['Total_False_Positive'] = total_fpr
eval_scores['Identity_Term_False_Negatives'] = false_negative
eval_scores['Total_False_Negative'] = total_fnr
eval_scores['FPR - FPRt'] = abs(total_fpr - eval_scores['Identity_Term_False_Positive'])
eval_scores['FNR - FNRt'] = abs(total_fnr - eval_scores['Identity_Term_False_Negatives'])
eval_scores

Error in  nan


Unnamed: 0,Identity_Titles,Identity_Term_False_Positive,Total_False_Positive,Identity_Term_False_Negatives,Total_False_Negative,FPR - FPRt,FNR - FNRt
0,asian,0.0,0.029727,0.546896,0.715558,0.029727,0.168663
1,black,0.044914,0.029727,0.31572,0.715558,0.015187,0.399838
2,younger,0.0,0.029727,0.569353,0.715558,0.029727,0.146206
3,bisexual,0.0,0.029727,0.726552,0.715558,0.029727,0.010994
4,female,0.002642,0.029727,0.457067,0.715558,0.027085,0.258491
5,muslim,0.0,0.029727,0.479524,0.715558,0.029727,0.236034
6,latinx,0.001321,0.029727,0.451783,0.715558,0.028406,0.263775
7,indian,0.001321,0.029727,0.455746,0.715558,0.028406,0.259812
8,catholic,0.003963,0.029727,0.384412,0.715558,0.025764,0.331146
9,queer,0.137384,0.029727,0.162483,0.715558,0.107658,0.553075


In [36]:
eval_scores['FPR - FPRt'].sum(), eval_scores['FNR - FNRt'].sum()

(3.198973935827821, 11.075945788388019)

In [37]:
total_auc = roc_auc_score(df_test['Label'].astype(float), EM)
terms_auc = []
identity_terms = []
for identity_term in set(terms):
    term_data = df_test[df_test['Identity_Terms'] == identity_term].reset_index()
    data = df_test.sample(n=len(term_data['Text']), random_state=0)
    data = term_data.append(data, ignore_index=True)
    y_true, y_pred = data['Label'].astype(int), data['prediction_scores']

    try:
        term_auc = roc_auc_score(y_true, y_pred.round())
        terms_auc.append(term_auc)
        identity_terms.append(identity_term)
    except:
        print("Error in ",identity_term)


    
eval_scores = pd.DataFrame(identity_terms, columns = ['Identity_Titles'])
eval_scores['AUCt'] = terms_auc
eval_scores['AUC'] = total_auc
eval_scores['AUC - AUCt'] = abs(eval_scores['AUC'] - eval_scores['AUCt'])
eval_scores

Error in  nan


Unnamed: 0,Identity_Titles,AUCt,AUC,AUC - AUCt
0,asian,0.734879,0.627358,0.107522
1,black,0.780318,0.627358,0.15296
2,younger,0.729356,0.627358,0.101999
3,bisexual,0.690695,0.627358,0.063337
4,female,0.7563,0.627358,0.128943
5,muslim,0.751449,0.627358,0.124091
6,latinx,0.757935,0.627358,0.130578
7,indian,0.756961,0.627358,0.129603
8,catholic,0.773833,0.627358,0.146476
9,queer,0.794499,0.627358,0.167141


In [38]:
print(eval_scores['AUC - AUCt'].sum())

5.308305750097208
