# Error analysis

## Load Pretrained model

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
saver_paths = ["./logs/abusive/hybrid/ckpt", "./logs/abusive/hybrid_larger_feature/ckpt"]
checkpoint_files = list(map(tf.train.get_checkpoint_state, saver_paths))

In [3]:
checkpoint_files

[model_checkpoint_path: "/home/homes/jhpark/hate-speech/logs/abusive/hybrid/ckpt/model-final.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/abusive/hybrid/ckpt/model-360000.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/abusive/hybrid/ckpt/model-370000.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/abusive/hybrid/ckpt/model-380000.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/abusive/hybrid/ckpt/model-390000.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/abusive/hybrid/ckpt/model-final.ckpt",
 model_checkpoint_path: "/home/homes/jhpark/hate-speech/logs/abusive/hybrid_larger_feature/ckpt/model-final.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/abusive/hybrid_larger_feature/ckpt/model-360000.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/abusive/hybrid_larger_feature/ckpt/model-370000.ckpt"
 all_model_checkpoint_

In [4]:
# create session for evaluation
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
session_conf = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)


In [5]:
from data.hybrid import load_data_from_file

(x_train, y_train, x_test, y_test, initW, vocab) = load_data_from_file("abusive_valid_binary")
word_text_len = x_train[0]["word"].shape[0]
word_vocab_size = len(vocab.vocabulary_)
char_text_len = x_train[0]["char"].shape[0]
char_vocab_size = x_train[0]["char"].shape[1]



Data Summary:
Train: Total Positive Labels=4145 (0.3228)
Test: Total Positive Labels=889 (0.3228)

dataset passed the assertion test


In [6]:
from data.hybrid import extract_from_batch

batchW, batchC = extract_from_batch(x_test)
feed_dicts = []
feed_dicts.append({"input/labels:0": y_test, "input/X_word:0": batchW, "input/X_char:0": batchC, "dropout_keep_prob:0": 1})
feed_dicts.append({"input/labels:0": y_test, "input/X_word:0": batchW, "input/X_char:0": batchC, "dropout_keep_prob:0": 1})

output_names = ["output/prediction", "output/prediction"]

In [7]:
preds = []
for i, ckpt in enumerate(checkpoint_files):
    print(ckpt.all_model_checkpoint_paths[0])
    tf.reset_default_graph()
    saver = tf.train.import_meta_graph("{}.meta".format(ckpt.all_model_checkpoint_paths[0]))

    with tf.Session(config=session_conf) as sess:
        saver.restore(sess, ckpt.model_checkpoint_path)
        graph = tf.get_default_graph()
        pred = sess.run(output_names[i] + ":0", feed_dicts[i])
        print(pred.shape)
        preds.append(pred)

/home/homes/jhpark/hate-speech/logs/abusive/hybrid/ckpt/model-360000.ckpt
(2754,)
/home/homes/jhpark/hate-speech/logs/abusive/hybrid_larger_feature/ckpt/model-360000.ckpt
(2754,)


In [8]:
from model.helper import calculate_metrics
for pred in preds:
    precision, recall, f1 = calculate_metrics(y_test, pred)
    print("precision=%.4f recall=%.4f f1=%.4f" % (precision, recall, f1))

precision=0.7592 recall=0.6985 f1=0.7276
precision=0.7346 recall=0.7255 f1=0.7301


Using TensorFlow backend.


since the metrics are same as the final output, we can validate that the pre-trained model has been loaded successfully

## Loading Baseline

In [9]:
from data.preprocess import load_from_file
data = load_from_file("abusive_valid_binary")

In [10]:
data["x_train"][:10]

array(['you have more legal rights than i do, is that ok?',
       'most women are bad drivers just like most men are douchebag drivers equality notsexist fact latraffic',
       'i think anthony is anti-feminazi and so am i',
       "and that's why they invented the term feminazi...",
       'kat is the biggest bitch in mkr history. mkr2015 mkr cow katthebitch',
       "male thor is unworthy of mjolnir so a woman is taking place. i'm not sexist but that is bull sheeeeeit",
       'hashtag killerblondes is the dickiest hashtag in all mkr history... seriously... why not butcherthemodels',
       "the world is enriched by your wisdom. i'm not sexist but i make sexist comments because it makes me lol.",
       '.@brittany_blade barb sheehan never says a word about being abused for 20 years. kills husband. claims abuse w no proof. walks. fems cheer.',
       "pro and anti blameonenotall promoting nothing but division. those of you that think you're facilitating equality, you're most certai

In [11]:
import numpy as np
from sklearn.linear_model import LinearRegression
from data.preprocess import load_from_file
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

In [12]:
def evaluate(pred_scores, target):
    pred = list(map(lambda x: 1 if x >= 0.5 else 0, pred_scores))
    precision, recall, f1 = calculate_metrics(target, pred)
    print("Training: Precision=%.2f Recall=%.2f, F1=%.2f" %(precision, recall, f1))
    return f1

In [13]:
def lr_with_freq(data):
    vect = CountVectorizer(ngram_range=(2,5), analyzer="char")
    X_train_counts = vect.fit_transform(data["x_train"])
    print(X_train_counts.shape)
    
    tfidf_transformer = TfidfTransformer()
    X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
    
    lr = LinearRegression().fit(X_train_tfidf, data["y_train"])
    evaluate(lr.predict(X_train_tfidf), data["y_train"])
    
    X_test_counts = vect.transform(data["x_test"])
    pred_scores = lr.predict(tfidf_transformer.transform(X_test_counts))
    evaluate(pred_scores, data["y_test"])
    return lr, pred_scores

In [14]:
_, baseline_pred = lr_with_freq(data)

(12842, 204654)
Training: Precision=0.99 Recall=0.99, F1=0.99
Training: Precision=0.70 Recall=0.67, F1=0.68


In [15]:
baseline_pred = list(map(lambda x: 1 if x >= 0.5 else 0, baseline_pred))

In [16]:
len(baseline_pred)

2754

## Error analysis

In [17]:
def get_false_positive(true, pred, x):
    assert len(true) == len(pred)
    assert len(true) == len(x)
    result = []
    for i in range(len(true)):
        if true[i] == 0 and pred[i] == 1:
            result.append(x[i])
    return len(result), result

In [18]:
def get_false_negative(true, pred, x):
    assert len(true) == len(pred)
    assert len(true) == len(x)
    result = []
    for i in range(len(true)):
        if true[i] == 1 and pred[i] == 0:
            result.append(x[i])
    return len(result), result

### Hybrid

In [19]:
hybrid_pred = preds[0]
len(hybrid_pred)

2754

In [20]:
get_false_positive(data["y_test"], hybrid_pred, data["x_test"])

(197,
 ["nothin unsexier than some1 that think they're all that. male or female “@the_eccles: awww, bless them, they still think they were hot..#mkr”",
  'your tweet said "call me sexist," followed by sexist statement. i comply, u take issue. men are so illogical and emotional!',
  'and remember that israel has only existed for a fraction of the nobel period.',
  '3 male indie filmmakers at the table next to me discussing a script for a new movie. it sounds sexist! this is y we need more women in media',
  'these 2 delusional, narcissistic hostesses on mkr make me want to vomit my own dinner up.',
  'by people, you mean men only. the women are clearly imprisoned in their homes.',
  'the translations are on a muslim web site and they say strike.',
  'my understanding is free speech means there should never be any consequences when you say revolting things so, check…',
  'really!!! really!!! another round!! mkr those butcher promo girls better be nice to sheri and emilie.',
  "the inform

In [21]:
get_false_negative(data["y_test"], hybrid_pred, data["x_test"])

(268,
 ["eva &amp; debra are now my most hated team on mkr after least night's ep. snakes in the grass those two.",
  'stoprush: "uses words like chickified &amp; feminazi, is a hate-monger." how do you actually function in this worl…',
  'that face kat just pulled was hideous. mkr',
  "ben i'm open to meeting good feminists like helen, etc. but uh...how can i say nicely, maddy is...not smart...at all. a lemming.",
  'and the secret twist. kat and andre are eliminated for being cunts!!!! mkr',
  'mkr kat: the wicked witch from the west',
  'personally i am just glad when they use the actual word and not feminazi',
  ".@facerealitynow i saw/rt'ed it. too funny. i saw this one today...",
  "at least we know they're natural blondes mkr",
  "yes i am a brony and a meninst get over it. you won't have sex with me? what kind of bullcrap is this oppression? femi…",
  'how can there be a biological inequality? that suggests that the process of evolution itself is s…',
  "i see that now...didn't

### Hybrid Larger Feature

In [22]:
hybrid_pred_large = preds[1]
len(hybrid_pred_large)

2754

In [23]:
get_false_positive(data["y_test"], hybrid_pred_large, data["x_test"])

(233,
 ["nothin unsexier than some1 that think they're all that. male or female “@the_eccles: awww, bless them, they still think they were hot..#mkr”",
  'your tweet said "call me sexist," followed by sexist statement. i comply, u take issue. men are so illogical and emotional!',
  'and remember that israel has only existed for a fraction of the nobel period.',
  '3 male indie filmmakers at the table next to me discussing a script for a new movie. it sounds sexist! this is y we need more women in media',
  'these 2 delusional, narcissistic hostesses on mkr make me want to vomit my own dinner up.',
  'by people, you mean men only. the women are clearly imprisoned in their homes.',
  "israel doesn't control syria. that is simply an unrealistic conspiracy theory.",
  'the translations are on a muslim web site and they say strike.',
  'wow, must be 5 or 6 of them.',
  'bugger...not... bye bye blondes... mkr',
  "blameonenotall is an epistemically violent hashtag, meant to minimize rape cul

In [24]:
get_false_negative(data["y_test"], hybrid_pred_large, data["x_test"])

(244,
 ["eva &amp; debra are now my most hated team on mkr after least night's ep. snakes in the grass those two.",
  'stoprush: "uses words like chickified &amp; feminazi, is a hate-monger." how do you actually function in this worl…',
  'a well done, one...must say. fem i was arguing w/ had rt me a few times..so i thought this was her cavalry',
  "ben i'm open to meeting good feminists like helen, etc. but uh...how can i say nicely, maddy is...not smart...at all. a lemming.",
  'and the secret twist. kat and andre are eliminated for being cunts!!!! mkr',
  'personally i am just glad when they use the actual word and not feminazi',
  ".@facerealitynow i saw/rt'ed it. too funny. i saw this one today...",
  "at least we know they're natural blondes mkr",
  "yes i am a brony and a meninst get over it. you won't have sex with me? what kind of bullcrap is this oppression? femi…",
  'how can there be a biological inequality? that suggests that the process of evolution itself is s…',
  "i se

## Comparison

In [25]:
total_true_positive = 0
count = 0
for i in range(len(hybrid_pred)):
    if hybrid_pred[i] == 1 and y_test[i] == 1:
        total_true_positive += 1
        if hybrid_pred_large[i] == 1:
            count += 1
    
                             

In [26]:
count/total_true_positive

0.9420289855072463

In [27]:
total_true_negative = 0
count = 0
for i in range(len(hybrid_pred)):
    if hybrid_pred[i] == 0 and y_test[i] == 0:
        total_true_negative += 1
        if hybrid_pred_large[i] == 0:
            count += 1
    
                             

In [28]:
count/total_true_negative

0.9514388489208633

## final_evaluation

In [29]:
final_pred = np.zeros(len(y_test))
len(final_pred)

2754

In [30]:
pred_positives_idx = []
pred_positives = []
for i in range(len(final_pred)):
    if hybrid_pred_large[i] == 1:
        pred_positives_idx.append(i)
        pred_positives.append(data["x_test"][i])
len(pred_positives_idx)    

878

In [31]:
saver_paths = ["./logs/combined/word/ckpt"]
checkpoint_files = list(map(tf.train.get_checkpoint_state, saver_paths))

In [32]:
checkpoint_files

[model_checkpoint_path: "/home/homes/jhpark/hate-speech/logs/combined/word/ckpt/model-final.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/combined/word/ckpt/model-360000.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/combined/word/ckpt/model-370000.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/combined/word/ckpt/model-380000.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/combined/word/ckpt/model-390000.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/combined/word/ckpt/model-final.ckpt"]

In [33]:
from data.hybrid import load_data_from_file

(_x_train, _y_train, _x_test, _y_test, _initW, _vocab) = load_data_from_file("combined_valid_binary")
_word_text_len = _x_train[0]["word"].shape[0]
_word_vocab_size = len(_vocab.vocabulary_)
print(_word_text_len)
print(_word_vocab_size)


Data Summary:
Train: Total Positive Labels=1441 (0.3476)
Test: Total Positive Labels=309 (0.3476)

dataset passed the assertion test
31
2880


In [51]:
from data.word import fit_input_into_vocab
_word = np.array(list(map(lambda x: fit_input_into_vocab(x, vocab=_vocab), pred_positives)))

Tokenizing tweets with tokenize_for_dictionary

['or', 'you', 'ending', 'your', 'feminazi', 'reign', 'on', 'this', 'earth']
changed data 48 into (1, 31)
Tokenizing tweets with tokenize_for_dictionary

['james', 'are', 'you', 'being', 'a', 'pc', 'feminazi']
changed data 37 into (1, 31)
Tokenizing tweets with tokenize_for_dictionary

['yeah', 'would', 'never', 'im', 'not', 'sexist', 'but', 'women', 'please', 'just', 'stop', 'tweeting', 'about', 'football', 'we', "don't", 'tweet', 'about', 'cooking']
changed data 113 into (1, 31)
Tokenizing tweets with tokenize_for_dictionary

['i', 'believe', 'in', 'equal', 'rights', 'there', 'for', 'am', 'not', 'sexist', 'but', "women's", 'reaction', 'times', 'general', 'awareness', '2', 'whats', 'going', 'on', 'whilst', 'd']
changed data 127 into (1, 31)
Tokenizing tweets with tokenize_for_dictionary

['that', 'face', 'kat', 'just', 'pulled', 'was', 'hideous', 'mkr']
changed data 42 into (1, 31)
Tokenizing tweets with tokenize_for_dictionary

['im', 'n

In [58]:
_feed_dicts = []
_feed_dicts.append({"input/labels:0": np.zeros(len(_word)).reshape(len(_word), 1), "input/X:0":_word.reshape((len(pred_positives), _word_text_len)) , "dropout_keep_prob:0": 1})

In [59]:
output_names = ["output/prediction"]

In [60]:
preds = []
for i, ckpt in enumerate(checkpoint_files):
    print(ckpt.all_model_checkpoint_paths[0])
    tf.reset_default_graph()
    saver = tf.train.import_meta_graph("{}.meta".format(ckpt.all_model_checkpoint_paths[0]))

    with tf.Session(config=session_conf) as sess:
        saver.restore(sess, ckpt.model_checkpoint_path)
        graph = tf.get_default_graph()
        pred = sess.run(output_names[i] + ":0", _feed_dicts[i])
        print(pred.shape)
        preds.append(pred)

/home/homes/jhpark/hate-speech/logs/combined/word/ckpt/model-360000.ckpt
(878,)


In [61]:
# final predictions
for i in range(len(pred_positives)):
    if preds[0][i] == 0:
        final_pred[pred_positives_idx[i]] = 1 # sexism
    else:
        final_pred[pred_positives_idx[i]] = 2 # racism

In [98]:
# get real labels
from data.preprocess import load_from_file

data_sexism = load_from_file("sexism_binary")
data_racism = load_from_file("racism_binary")

x_abusive_valid = []
y_abusive_valid = []
real_y_abusive_valid = []

def put_into_abusive(x_sex, y_sex, x_race, y_race, x, y, _y=None):
    assert len(x_sex) == len(y_sex)
    assert len(x_race) == len(y_race)
    if _y == None:
        _y = []
    for i in range(len(x_sex)):
        if y_sex[i] == 1:
            x.append(x_sex[i])
            y.append(y_sex[i])
            _y.append(1)
    for i in range(len(x_race)):
        x.append(x_race[i])
        y.append(y_race[i])
        if y_race[i] == 1:
            _y.append(2)
        else:
            _y.append(0)
put_into_abusive(data_sexism["x_valid"], data_sexism["y_valid"], data_racism["x_valid"], data_racism["y_valid"], x_abusive_valid, y_abusive_valid, _y=real_y_abusive_valid)

In [108]:
np.all(y_abusive_valid == data["y_test"])

True

In [109]:
np.all(x_abusive_valid == data["x_test"])

True

In [110]:
len(list(filter(lambda x: x ==0, final_pred)))

1876

In [111]:
len(list(filter(lambda x: x ==0, real_y_abusive_valid)))

1865

In [112]:
len(list(filter(lambda x: x == 1, final_pred)))

539

In [113]:
len(list(filter(lambda x: x == 1, real_y_abusive_valid)))

580

In [114]:
len(list(filter(lambda x: x == 2, final_pred)))

339

In [116]:
len(list(filter(lambda x: x == 2, real_y_abusive_valid)))

309

In [132]:
def _evaluate(true, pred, _class):
    tp = 0
    fp = 0
    fn = 0
    for i in range(len(true)):
        if true[i] == pred[i] and true[i] == _class:
            tp += 1
        elif pred[i] != _class and true[i] == _class:
            fn += 1
        elif pred[i] == _class and true[i] != _class:
            fp += 1
    return tp / (tp + fp), tp / (tp + fn), tp, fp

def _accuracy(true, pred):
    count = 0
    for i in range(len(true)):
        if true[i] == pred[i]:
            count += 1
    return count / len(true)

In [133]:
_evaluate(real_y_abusive_valid, final_pred, 1) #sexism

(0.5287569573283859, 0.49137931034482757, 285, 254)

In [130]:
_evaluate(real_y_abusive_valid, final_pred, 2) # racism

(0.4306784660766962, 0.47249190938511326, 146)

In [131]:
_evaluate(real_y_abusive_valid, final_pred, 0) # none

(0.8699360341151386, 0.8750670241286863, 1632)

In [124]:
_accuracy(real_y_abusive_valid, final_pred)

0.7490922294843864