# Error analysis

## Load Pretrained model

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
saver_paths = ["./logs/sf2/char/ckpt", "./logs/sf2/word/ckpt", "./logs/sf2/hybrid/ckpt"]
checkpoint_files = list(map(tf.train.get_checkpoint_state, saver_paths))

In [3]:
checkpoint_files

[model_checkpoint_path: "/home/homes/jhpark/hate-speech/logs/sf2/char/ckpt/model-final.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/sf2/char/ckpt/model-final.ckpt",
 model_checkpoint_path: "/home/homes/jhpark/hate-speech/logs/sf2/word/ckpt/model-final.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/sf2/word/ckpt/model-final.ckpt",
 model_checkpoint_path: "/home/homes/jhpark/hate-speech/logs/sf2/hybrid/ckpt/model-final.ckpt"
 all_model_checkpoint_paths: "/home/homes/jhpark/hate-speech/logs/sf2/hybrid/ckpt/model-final.ckpt"]

In [4]:
# create session for evaluation
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
session_conf = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)


In [5]:
from data.hybrid import load_data_from_file

(x_train, y_train, x_test, y_test, initW, vocab) = load_data_from_file("sexism_final2_binary")
word_text_len = x_train[0]["word"].shape[0]
word_vocab_size = len(vocab.vocabulary_)
char_text_len = x_train[0]["char"].shape[0]
char_vocab_size = x_train[0]["char"].shape[1]



Data Summary:
Train: Total Positive Labels=3284 (0.2372)
Test: Total Positive Labels=580 (0.2371)

dataset passed the assertion test


In [6]:
from data.hybrid import extract_from_batch

batchW, batchC = extract_from_batch(x_test)
feed_dicts = []
feed_dicts.append({"input/Placeholder_1:0": y_test, "input/Placeholder:0": batchC, "nn-layers/fully-connected-layer-0/dropout_1/keras_learning_phase:0": 0})
feed_dicts.append({"input/labels:0": y_test, "input/X:0": batchW, "dropout_keep_prob:0": 1})
feed_dicts.append({"input/labels:0": y_test, "input/X_word:0": batchW, "input/X_char:0": batchC, "dropout_keep_prob:0": 1})

output_names = ["prediction/prediction", "output/prediction", "output/prediction"]

In [7]:
preds = []
for i, ckpt in enumerate(checkpoint_files):
    print(ckpt.all_model_checkpoint_paths[0])
    tf.reset_default_graph()
    saver = tf.train.import_meta_graph("{}.meta".format(ckpt.all_model_checkpoint_paths[0]))

    with tf.Session(config=session_conf) as sess:
        saver.restore(sess, ckpt.model_checkpoint_path)
        graph = tf.get_default_graph()
        pred = sess.run(output_names[i] + ":0", feed_dicts[i])
        print(pred.shape)
        preds.append(pred)

/home/homes/jhpark/hate-speech/logs/sf2/char/ckpt/model-final.ckpt
(2446,)
/home/homes/jhpark/hate-speech/logs/sf2/word/ckpt/model-final.ckpt
(2446,)
/home/homes/jhpark/hate-speech/logs/sf2/hybrid/ckpt/model-final.ckpt
(2446,)


In [8]:
from model.helper import calculate_metrics
for pred in preds:
    precision, recall, f1 = calculate_metrics(y_test, pred)
    print("precision=%.4f recall=%.4f f1=%.4f" % (precision, recall, f1))

Using TensorFlow backend.


precision=0.6703 recall=0.6448 f1=0.6573
precision=0.6938 recall=0.6603 f1=0.6767
precision=0.6892 recall=0.6845 f1=0.6869


since the metrics are same as the final output, we can validate that the pre-trained model has been loaded successfully

## Load original test data

In [9]:
from data.preprocess import load_from_file
data = load_from_file("sexism_final2_binary")

In [10]:
data["x_test"][:10]

array(['time to cheer on twinners on mkr! learn more about them here:',
       'annie and lloyd wonder why no one is lining up.. who the eff eats meatballs for breakfast.. mkr',
       '"@nasalasercomm: what do lasercomm and thedress have in common? nasa\'s getting in to it too!',
       "i've always wanted one of those huge english hats. the kind you see royalty wearing.",
       'their house situation is so weird tho. i am not for walking across a park to get to the front door! mkr',
       'lol which salty reject started blameonenotall', 'fuck. yes.',
       'while i\'m in rant mode can i also address the term "feminazi"?',
       "it's free food. seriously, they don't care mkr",
       'rape is not a rhetorical device. rape is not a rhetorical device. rape is not a rhetorical device.'], 
      dtype='<U147')

In [11]:
from data.char import one_hot_to_chars
["".join(one_hot_to_chars(x)) for x in batchC[:10]]

['timetocheerontwinnersonmkr!learnmoreaboutthemhere:',
 'annieandlloydwonderwhynooneisliningup..whotheeffeatsmeatballsforbreakfast..mkr',
 '@nasalasercomm:whatdolasercommandthedresshaveincommon?nasasgettingintoittoo!',
 'ivealwayswantedoneofthosehugeenglishhats.thekindyouseeroyaltywearing.',
 'theirhousesituationissoweirdtho.iamnotforwalkingacrossaparktogettothefrontdoor!mkr',
 'lolwhichsaltyrejectstartedblameonenotall',
 'fuck.yes.',
 'whileiminrantmodecanialsoaddressthetermfeminazi?',
 'itsfreefood.seriously,theydontcaremkr',
 'rapeisnotarhetoricaldevice.rapeisnotarhetoricaldevice.rapeisnotarhetoricaldevice.']

## Loading Baseline

In [15]:
import numpy as np
from sklearn.linear_model import LinearRegression
from data.preprocess import load_from_file
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

In [16]:
def evaluate(pred_scores, target):
    pred = list(map(lambda x: 1 if x >= 0.5 else 0, pred_scores))
    precision, recall, f1 = calculate_metrics(target, pred)
    print("Training: Precision=%.2f Recall=%.2f, F1=%.2f" %(precision, recall, f1))
    return f1

In [17]:
def lr_with_freq(data):
    vect = CountVectorizer(ngram_range=(2,5), analyzer="char")
    X_train_counts = vect.fit_transform(data["x_train"])
    print(X_train_counts.shape)
    
    tfidf_transformer = TfidfTransformer()
    X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
    
    lr = LinearRegression().fit(X_train_tfidf, data["y_train"])
    evaluate(lr.predict(X_train_tfidf), data["y_train"])
    
    X_test_counts = vect.transform(data["x_test"])
    pred_scores = lr.predict(tfidf_transformer.transform(X_test_counts))
    evaluate(pred_scores, data["y_test"])
    return lr, pred_scores

In [18]:
_, baseline_pred = lr_with_freq(data)

(13846, 214687)
Training: Precision=0.98 Recall=0.98, F1=0.98
Training: Precision=0.70 Recall=0.63, F1=0.66


In [19]:
baseline_pred = list(map(lambda x: 1 if x >= 0.5 else 0, baseline_pred))

In [20]:
len(baseline_pred)

2446

## Baseline vs CNN

In [42]:
both_word_right = []
both_word_wrong = []
for i in range(len(baseline_pred)):
    if baseline_pred[i] == char_pred[i] and char_pred[i] == 0 and word_pred[i] == 1 and y_test[i] == 1:
        both_word_right.append(data["x_test"][i])
    if char_pred[i] == 1 and word_pred[i] == 0  and y_test[i] == 1:
        both_word_wrong.append(data["x_test"][i])
    
                             

In [43]:
both_word_right

['do men pretend to give birth? no? then women should stop pretending to make a difference in politics.',
 'so what you are saying is, only women get raped? never knew rape was a culture either. good to know.',
 ".@notchrissmith free market. they get what they negotiate. producers feel it's fair or not. the actor/actress can accept or not. askhermore",
 'certain threads: 👦 tech isn’t sexist 👧 yes it is 👦 no it isn’t 👧👧 yes it is 👦 no it isn’t 👧👧👧 yes it is 👦 no it isn’t 👧👧👧👧 …',
 'katie and nikki go down. maybe not well. but down none the less. mkr',
 "katieandnikki stop calling yourselves pretty and hot..you're not and saying it a million times doesn't make you either...stfu mkr",
 "i know it's hard for a jihadi bitch to understand but reading comprehension is all that is required.",
 'i see you use social media. i want all social media sites to go down for a week just to see how the girls react. notsexist',
 "sexist is when people don't go because it's women. people aren't going beca

In [19]:
word_right_neg = []
word_wrong_neg = []
for i in range(len(baseline_pred)):
    ifchar_pred[i] == 1 and word_pred[i] == 0 and y_test[i] == 0:
        word_right_neg.append(data["x_test"][i])
    if baseline_pred[i] == 1 and char_pred[i] == 1 and word_pred[i] == 0  and y_test[i] == 1:
        word_wrong_neg.append(data["x_test"][i])

In [20]:
word_right_neg

['i know saudis chased girls into a burning building.',
 'islam mt germany - the traditional rosemonday carnival parade in duesseldorf afp patrik stollarz',
 'mkr 17....ok that is bullshit and unfair. there have been a lot of worst dishes than what they served.',
 "blameonenotall because men's feelings about how women discuss sexism matter more than the abuses women deal with",
 'and the dress is only symbolic of the broader sexism. islam',
 'i want the blondes to meet the frizzy loud girl in the other group mkr',
 'oh bless. little lines. sand. micro herbs. bless bless bless. mkr',
 "wait, so some guys can't relate to female protagonists but have no problem embodying murderous, sociopathic men?",
 "this. blocklist doesn't look at all at what you say...",
 "if you hadn't noticed, i save my witty replies for sexists with a little more panache. i don't want to get above your reading level",
 "or hit her with the so called bread she made. that'd knock her out. mkr",
 'remember that time j

In [21]:
word_wrong_neg

['the feminazi pigs will always run into a battering ram and pulpify, because this is an environment never designed for them to win in.',
 ".@mt8_9 fathers who can't pay child support is the only noncombatant group who can be …",
 'the amount of times i wonder where people got their licenses is directly proportional to the number of female drivers o…',
 'you should be able to save from here...right click or hold down on them and you should get a save option.',
 '...as a result there is a possible supply of 100% of the population for all "female" jobs vs. maybe 20% for some "male" jobs.',
 "have you ever had a female judge outraged at you for not paying child support for someone else's",
 'your cooking is still shit kat mkr',
 'honestly. some of the rougher female heads on tv are compiled right here mkr',
 'yeah. my eye opener was a pay gap article. i know a little about econ...tried to explain why it was math, not sexism. but...',
 'adviceforyoungfeminists isis is killing people, our s

### Char vs Word

In [9]:
char_pred = preds[0]
word_pred = preds[1]
hybrid_pred = preds[2]


In [19]:
word_right = []
word_wrong = []
for i in range(len(baseline_pred)):
    if char_pred[i] == 0 and word_pred[i] == 1 and y_test[i] == 1:
        word_right.append(data["x_test"][i])
    if char_pred[i] == 1 and word_pred[i] == 0  and y_test[i] == 1:
        word_wrong.append(data["x_test"][i])
    
                             

In [20]:
len(word_right)

47

In [21]:
len(word_wrong)

38

In [22]:
word_right

['do men pretend to give birth? no? then women should stop pretending to make a difference in politics.',
 'so what you are saying is, only women get raped? never knew rape was a culture either. good to know.',
 ".@notchrissmith free market. they get what they negotiate. producers feel it's fair or not. the actor/actress can accept or not. askhermore",
 'go hot girls!!! mkr',
 'certain threads: 👦 tech isn’t sexist 👧 yes it is 👦 no it isn’t 👧👧 yes it is 👦 no it isn’t 👧👧👧 yes it is 👦 no it isn’t 👧👧👧👧 …',
 "the baby doesn't have a dad in her life and her mom is a whore. this is a future stripper of america in the making",
 "i'm gonna gouge my eyes out if this bitch doesn't get off mkr catandandre",
 'katie and nikki go down. maybe not well. but down none the less. mkr',
 "katieandnikki stop calling yourselves pretty and hot..you're not and saying it a million times doesn't make you either...stfu mkr",
 "i know it's hard for a jihadi bitch to understand but reading comprehension is all tha

In [23]:
word_wrong

['oh...like the new name, btw..."punching down" another inane tool they use to silence dissent...and facts...and logic.',
 "omg kat! calling them suckers? i can't wait to see what they said about your slop... who's the suckers now? ha-ha!!! mkr...",
 'you know kat and andre are awful when i want katie and nikki to go through mkr',
 "...a mf'ing state of the union invite.",
 'the feminazi pigs will always run into a battering ram and pulpify, because this is an environment never designed for them to win in.',
 ".@mt8_9 fathers who can't pay child support is the only noncombatant group who can be …",
 "i said it for months. didn't know how good mckinnon was...",
 'the amount of times i wonder where people got their licenses is directly proportional to the number of female drivers o…',
 'you should be able to save from here...right click or hold down on them and you should get a save option.',
 '...as a result there is a possible supply of 100% of the population for all "female" jobs vs. 

### Word vs Hybrid

In [24]:
hybrid_right = []
hybrid_wrong = []
for i in range(len(baseline_pred)):
    if word_pred[i] == 0 and hybrid_pred[i] == 1 and y_test[i] == 1:
        hybrid_right.append(data["x_test"][i])
    if word_pred[i] == 1 and hybrid_pred[i] == 0  and y_test[i] == 1:
        hybrid_wrong.append(data["x_test"][i])
    
                             

In [25]:
hybrid_right

['do the killa models give a killa dessert?? mkr',
 "omg kat! calling them suckers? i can't wait to see what they said about your slop... who's the suckers now? ha-ha!!! mkr...",
 "mkr annie's never cooked on a bbq before. see...she is an alien.",
 'from zero to "cunt" in eight seconds. too bad that word really means "i\'m an insecure whiny baby" and says nothing about me.',
 'the feminazi pigs will always run into a battering ram and pulpify, because this is an environment never designed for them to win in.',
 ".@mt8_9 fathers who can't pay child support is the only noncombatant group who can be …",
 'you should be able to save from here...right click or hold down on them and you should get a save option.',
 'kat so full of grace. not. mkr',
 "blameonenotall for all feminist's talk of not hating men,they sure come out in force when men dare speak about what affects em.",
 "have you ever had a female judge outraged at you for not paying child support for someone else's",
 'your cooking

In [26]:
len(hybrid_right)

36

In [27]:
hybrid_wrong

[".@notchrissmith free market. they get what they negotiate. producers feel it's fair or not. the actor/actress can accept or not. askhermore",
 'cuntandandre cuntandandre cuntandandre, here we go again, you strategic arseholes! mkr',
 'katie and nikki go down. maybe not well. but down none the less. mkr',
 "sexist is when people don't go because it's women. people aren't going because it's of a poor qua…",
 'kat is the king joffrey of mkr',
 "females need to stop thinking that they'll be treated exactly the same as males...society wont let it happen factsoflife n…",
 "ladies: i don't think that you can't drive, i just know that you can't park or back out of a parking space. facts notsex…",
 'if being pro-due process makes you pro-rape, does being anti-death penalty make you pro-murder?',
 'really you acted surprise? wow thats low kat mkr',
 'these are deash guns, cunt.',
 'i ask this every year. so looking forward to the older blonde. cue geggy tah "whoever you are" mkr',
 'every girl

In [36]:
len(hybrid_wrong)

22

In [14]:
correct = 0
hybrid_also_correct = 0
word_also_correct = 0
agreement = 0 

for i in range(len(hybrid_pred)):
    if y_test[i] == 1:
        correct += 1
        if hybrid_pred[i] == char_pred[i]:
            agreement +=1
            

In [11]:
agreement/correct # char/word

0.853448275862069

In [13]:
agreement/correct # hybrid/word

0.9

In [16]:
agreement/correct # hybrid/char

0.8706896551724138

In [22]:
all_wrong = []
for i in range(len(char_pred)):
    if char_pred[i] == 0 and word_pred[i] == 0 and hybrid_pred[i] == 0 and y_test[i] == 1:
        all_wrong.append(data["x_test"][i])
    
                             

In [23]:
all_wrong

['in the past 12h, is seeing the most targeted abuse, with 194 (36 oc/158 rt) gg hashtaged tweets mentioning her. congrats!',
 "oh, weird. i'm surprised it didn't offer twitter.",
 'i honestly think kat has mental health issues. mkr',
 'who is to blame for that?',
 "honestly, i didn't look...because i'd bet my life we know the answer.",
 'it took the priest a year to convince her to tell the cops. he served 5 years out of 20. she got 26 months.',
 '3 hours away, probably fucking some dude instead of making me a sandwich.',
 '...i learned that doing so made me a "misogynist".',
 'usually takes 20 to 120 minutes for redscarebot...',
 'ash found her inner bogun whistle fingersinthemouth ewwww mkr',
 'like your picture? hahaha nice skin and hair ya slut.',
 'if you look at you see tweets of pure hatred. if you look at you see a few light stupidity a…',
 '"climb your beanstalk back to hell"',
 'we’re gonna get stuck with this friggin head kat mkr 😩',
 'too much drinking. too many porn stars