In [27]:
import pandas as pd
import numpy as np
import importlib

numbers = ['1','2','3','4','5','6','7','8','9','0']
number_words = ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten']
selection = ['multi']
baseline = importlib.import_module('transformer-baseline-task-1')

def contains_number_word(inputString):
    return any(word in number_words for word in inputString.split())

def has_numbers(inputString):
    return any(char.isdigit() for char in inputString)

df = pd.read_json('./data/validation.jsonl', lines=True)
df['postText'] = df['postText'].apply(lambda p: p[0])
df['spoiler_text_contains_numbers'] = df['postText'].apply(lambda p: has_numbers(p))
df['spoilerType'] = np.where(df['spoiler_text_contains_numbers'] == True, '[multi', '')

In [28]:
mask_multi_pred = df.spoiler_text_contains_numbers.apply(lambda x: x)
mask_multi_act = df.tags.apply(lambda x: any(item for item in selection if item in x))
mask_multi_not_pred = df.spoiler_text_contains_numbers.apply(lambda x: not x)
mask_multi_not_act = df.tags.apply(lambda x: any(item for item in selection if item not in x))

true_positive = len(df[mask_multi_pred & mask_multi_act].index)
false_positive = len(df[mask_multi_pred & mask_multi_not_act].index)
true_negative = len(df[mask_multi_not_pred & mask_multi_not_act].index)
false_negative = len(df[mask_multi_not_pred & mask_multi_act].index)

print('TP: ', true_positive)
print('FP: ', false_positive)
print('TN: ', true_negative)
print('FN: ', false_negative)

accuracy = (true_positive + true_negative) / (true_positive + false_positive + true_negative + false_negative)
print('Accuracy: ', accuracy)

TP:  66
FP:  76
TN:  581
FN:  77
Accuracy:  0.80875


In [34]:
df_predictable_by_transformer = df[mask_multi_not_pred]
results = pd.DataFrame(baseline.predict(df_predictable_by_transformer[0:3]))

results

 33%|███████████████████████████████████████████████████▋                                                                                                       | 1/3 [00:00<00:00,  5.20it/s]
  query_layer = query_layer / torch.tensor(scale, dtype=query_layer.dtype)
  p2c_att = torch.matmul(key_layer, torch.tensor(pos_query_layer.transpose(-1, -2), dtype=key_layer.dtype))
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:19<00:00, 19.79s/it]


Unnamed: 0,uuid,spoilerType
0,6dc7ddef-4e8e-4a6b-9296-526377518071,passage
1,435b24de-56f6-4d4e-9c38-54b8e0630aac,phrase
2,8091ba93-6376-473a-9117-01d6cf0507e4,passage


In [35]:
merged = pd.merge(df, results, left_on='uuid', right_on='uuid', how='left')
merged['spoilerType_y'] = merged.apply(lambda x: [x['spoilerType_x']] if x['spoiler_text_contains_numbers'] == True else [x['spoilerType_y']], axis=1)
cleaned = merged.drop('spoilerType_x', axis=1)

mask_tag_equals_spoiler_type = cleaned.tags.apply(lambda t: t == spoiler_type_y)
true_positive = len(df[mask_tag_equals_spoiler_type].index)
accuracy = true_positive / len(df.index)
accuracy

Unnamed: 0,uuid,postId,postText,postPlatform,targetParagraphs,targetTitle,targetDescription,targetKeywords,targetMedia,targetUrl,provenance,spoiler,spoilerPositions,tags,spoiler_text_contains_numbers,spoilerType_y
0,6dc7ddef-4e8e-4a6b-9296-526377518071,800048986762423_885081814925806,Five Nights at Freddy’s Sequel Delayed for Wei...,Facebook,[Five Nights at Freddy’s creator Scott Cawthon...,Five Nights at Freddy’s Sequel Delayed for Wei...,Five Nights at Freddy's creator Scott Cawthon ...,,[https://gamerant.com/wp-content/uploads/five-...,https://gamerant.com/five-nights-at-freddys-se...,"{'source': 'anonymized', 'humanSpoiler': 'They...",[some of the plot elements are so disturbing t...,"[[[2, 158], [2, 236]]]",[passage],False,[passage]
1,435b24de-56f6-4d4e-9c38-54b8e0630aac,4jp20e,Why Arizona Sheriff Joe Arpaio’s fate could ha...,reddit,[© REUTERS/Laura Segall Maricopa County Sherif...,Why Arizona Sheriff Joe Arpaio’s fate could ha...,"<p>A single word — ""intentionally"" — could tra...",,[https://archive.is/0eGBT/bbf6d79b366fc7e75b8b...,https://archive.is/0eGBT,"{'source': 'anonymized', 'humanSpoiler': '""Int...","[""intentionally"", could transform a court case...","[[[0, 197], [0, 212]], [[0, 215], [0, 328]]]",[multi],False,[phrase]
2,8091ba93-6376-473a-9117-01d6cf0507e4,857559227384160256,Here’s how much you should be tipping your hai...,Twitter,[Here’s how much you should be tipping your ha...,Here’s how much you should be tipping your hai...,Remembering how much you’re supposed to tip an...,,,,"{'source': 'anonymized', 'humanSpoiler': None,...",[20%],"[[[3, 58], [3, 61]]]",[phrase],False,[passage]
3,8b713277-0e5b-4873-a216-b650f21f3b4c,389091583213453312,"""Harry Potter"" alums reunite for new movie",Twitter,[The mythology of punk music's evolution can b...,"Alan Rickman & Rupert Grint On 'CBGB,' Reuniti...",The mythology of punk music's evolution can be...,"Alan Rickman,Hilly Kristal,new rupert grint mo...",[http://s.m.huffpost.com/assets/Logo_Huffingto...,http://huff.to/1ccNwKJ,"{'source': 'anonymized', 'humanSpoiler': 'Alan...","[Alan Rickman & Rupert Grint, CBGB]","[[[-1, 0], [-1, 27]], [[0, 98], [0, 102]]]",[multi],False,[nan]
4,a2f91b65-c36c-481f-92b1-2fc77d6411fc,744740593046274048,A man swallowed a microSD card and you won't b...,Twitter,[PetaPixel is one of my favorite blogs. The wr...,Man swallowed a microSD card and you won't bel...,PetaPixel is one of my favorite blogs. The wri...,,[https://cdn0.vox-cdn.com/uploads/chorus_image...,http://www.theverge.com/circuitbreaker/2016/6/...,"{'source': 'anonymized', 'humanSpoiler': 'This...",[a man who swallowed a 64GB microSD card and t...,"[[[1, 34], [1, 108]]]",[passage],False,[nan]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,1189d343-42eb-47e7-8395-ff978a683875,428006164904034305,This is what happens when you leave a hotel cl...,Twitter,[Instead of encountering a mound of dirty towe...,This Is What Happens When You Leave A Hotel Cl...,Instead of encountering a mound of dirty towel...,"givebackfilms,give back films,video,random act...",[http://s.m.huffpost.com/assets/Logo_Huffingto...,http://huff.to/1ebARdm,"{'source': 'anonymized', 'humanSpoiler': 'She ...",[The video below shows the stunned cleaner ini...,"[[[3, 0], [3, 150]]]",[passage],True,[multi]
796,7912282b-137b-4098-875d-8ad9f19354a8,806153730206892032,This Texas GOP elector announces that he won't...,Twitter,[A Republican elector in Texas says he will no...,Texas GOP elector announces he won't vote for ...,The Electoral College vote for president on De...,"donald trump, texas, electoral college, faithl...",,,"{'source': 'anonymized', 'humanSpoiler': None,...",[Christopher Suprun],"[[[1, 45], [1, 63]]]",[phrase],False,[nan]
797,1fdf71e8-ec14-4c3b-a7c5-ca678c6f8ccb,847331053991813120,This beauty queen cured her acne with one diet...,Twitter,[Her inspirational journey is encouraging othe...,UK beauty queen cured her severe acne with one...,A beauty pageant contestant has taken to Insta...,"acne, Skincare, beauty, Beauty pageant, Dermat...",,,"{'source': 'anonymized', 'humanSpoiler': None,...","[Rachel Crawley, High fat vegan plant based di...","[[[2, 144], [2, 158]], [[6, 56], [6, 124]]]",[multi],False,[nan]
798,17f6b540-cf8d-4ddf-8321-1c9ce2315d71,788056531304583168,WikiLeaks' Julian Assange Reported Dead,Twitter,"[On 16 October 2016, WikiLeaks posted a series...","WikiLeaks’ Julian Assange Isn’t Dead, Just Off...",A series of mysterious tweets from WikiLeaks l...,"dead man's switch, julian assange, wikileaks",[http://static.snopes.com/app/themes/snopes-th...,http://trib.al/leR8lNw,"{'source': 'anonymized', 'humanSpoiler': 'It w...",[Julian Assange’s internet link has been inten...,"[[[11, 0], [11, 78]]]",[passage],False,[nan]
