In [1]:
%load_ext autoreload
%autoreload 2


import pandas as pd
import spacy
nlp = spacy.load("en_core_web_lg")

from os import getcwd, listdir
from os.path import join, dirname, isfile

PATH_REPO = dirname(getcwd())
PATH_UTILS = join(PATH_REPO, 'utils')
PATH_DATA = join(PATH_REPO, 'data')
PATH_CONFIG = join(PATH_REPO, 'config')
PATH_RESULTS = join(PATH_REPO, 'results')

import sys
sys.path.append(PATH_UTILS)


from baseline import load_t_matcher, run_baseline
from metrics import pipeline_report

from sklearn.metrics import precision_recall_fscore_support as score

import numpy as np

pd.set_option('max_colwidth', None)

In [13]:
data = pd.read_csv(join(PATH_DATA, "test_set.csv"), names = ['text', 'label'], engine="python",skiprows=1)
# data['label'] = np.where(data['label']=="Gratitude", 1, 0)
data['text'] = data['text'].astype(str)
texts = data['text'].values.tolist()
data

Unnamed: 0,text,label
0,Another great IPC meeting. A huge thank you to our amazing Mo and Sarah USER USER who have provided consistent and compassionate infectioncontrol leadership throughout COVID19 Pandemic. 'IPC is everyone's business' is your legacy We all will miss you â¤ï¸,
1,Weâ€™re all set for this evenings USER first up weâ€™re hearing from keynote speaker Mel Smith CEO of USER talking about the huge rise in online food shopping thanks to COVID19 CFL21,
2,"The vaccine programme, the largest in NHS history, is currently moving through Scots who are between 70 and 79 years old, with progress speeding up thanks to the launch of mass vaccination centres such as those at the P&amp;J Live in Aberdeen and Edinburgh International Conference",
3,Had a text offering me the vaccination. Booked in for Friday at Clarendon Recreation Centre. USER NHS workers and all involved are doing an amazing job of getting the vaccine out in Salford. Thank you for your efforts. SpiritOfSalfordâ¤ï¸,
4,Many thanks to USER for highlighting the impact of smell and taste disorders with this powerful video. I founded USER with USER in 2012 after losing my sense of smell through head injury. Weâ€™re here to provide support &amp; information to anyone in need.,
...,...,...
16764,"USER There are models of edtech integration that build up to learning activities that can only occur online. I suggest that several skirt the boundary of web 3. Sadly, often the most enthusiastic teacher's hands are tied by school policy and exam deadlines, and never get that far.",
16765,"Yâ€™all have forced me to think about politics and by â€œthink about politicsâ€ I mean hoping, praying and waiting for the day weâ€™re discussing parties that arenâ€™t UN* and PN* ðŸ¥´ðŸ¥´",
16766,From the time a Speaker can say that there is to be no debate on the issueâ€¦just the casting of a voteâ€¦I already wrote this fiasco as utter rubbishâ€¦and Iâ€™m genuinely not surprised by what took place today. Love that for us as tax-paying citizens.,
16767,This town would have no more residents by the time Love &amp; Joe done yes,


In [14]:
docs = list(nlp.pipe(texts))

In [15]:
t_mat = load_t_matcher(PATH_CONFIG, 't_patterns.json', nlp)
# p_mat = load_p_matcher(PATH_CONFIG, 'ph_patterns.json', nlp)

In [16]:
predictions = []
patterns = []
sequences = []

for doc in docs:
  appre, pat, seq = run_baseline(nlp, doc, t_mat)
  predictions.append(appre)
  patterns.append(pat)
  sequences.append(seq)


data['prediction'] = np.where(predictions, 1, 0)
data['patterns'] = patterns
data['matching sequences'] = sequences


In [17]:
data.head()

Unnamed: 0,text,label,prediction,patterns,matching sequences
0,Another great IPC meeting. A huge thank you to our amazing Mo and Sarah USER USER who have provided consistent and compassionate infectioncontrol leadership throughout COVID19 Pandemic. 'IPC is everyone's business' is your legacy We all will miss you â¤ï¸,,1,[Explicit Gratitude],[thank you]
1,Weâ€™re all set for this evenings USER first up weâ€™re hearing from keynote speaker Mel Smith CEO of USER talking about the huge rise in online food shopping thanks to COVID19 CFL21,,1,[Explicit Gratitude],[thanks to]
2,"The vaccine programme, the largest in NHS history, is currently moving through Scots who are between 70 and 79 years old, with progress speeding up thanks to the launch of mass vaccination centres such as those at the P&amp;J Live in Aberdeen and Edinburgh International Conference",,1,[Explicit Gratitude],[thanks to]
3,Had a text offering me the vaccination. Booked in for Friday at Clarendon Recreation Centre. USER NHS workers and all involved are doing an amazing job of getting the vaccine out in Salford. Thank you for your efforts. SpiritOfSalfordâ¤ï¸,,1,"[Expressing Appreciation3, Explicit Gratitude]","[amazing job, Thank you]"
4,Many thanks to USER for highlighting the impact of smell and taste disorders with this powerful video. I founded USER with USER in 2012 after losing my sense of smell through head injury. Weâ€™re here to provide support &amp; information to anyone in need.,,1,[Explicit Gratitude],[thanks to]


In [91]:
precision, recall, fscore, support = score(data['label'] ,data['prediction'], average='macro')
print("Precision:\t", precision, "\n", "Recall:\t", recall, "\n", "F1:\t", fscore)

Precision:	 0.9177561568865917 
 Recall:	 0.9173513075676927 
 F1:	 0.9175503145677995


## Check the false positives:

In [92]:
print(len(data[(data['label']==0) & (data['prediction']==1)]))
data[(data['label']==0) & (data['prediction']==1)][0:68]

21


Unnamed: 0,text,label,prediction,patterns,matching sequences
24,"USER I hear you're the best, I fly 2x a week &amp; USER has let me down big. Get me to DC by 2:30pm tom &amp; I'll give you my money instead",0,1,[],[]
30,"USER You’re very kind, but it’s 9:32pm&amp;I need to relax. I’ll be happy to connect tomorrow through DM. Can’t say your CS isn’t prompt! Thank you.",0,1,[Explicit Gratitude],[Thank you]
39,"USER That's very kind of you, but this was a while back. Amazon already fixed the issue, even though it wasn't their fault the delivery went to the wrong house. :)",0,1,[Emphasizing Apperciation2],[kind of you]
45,"Shout out to USER for lying to me about my plan, when trying to get it fixed telling me there is nothing to do about it and to this day having the worst customer service out there. You're the best. :)",0,1,[],[]
68,"USER You misunderstand, I'm just asking if its possible, I haven't tried it yet (still a couple of days on my trial),many thanks for helping :)",0,1,[Explicit Gratitude],[thanks for]
273,"USER i love iOS 11. it’s so fun having a phone that freezes, restarts, and messes up every ten minutes. thank you for being incompetent :)",0,1,[Explicit Gratitude],[thank you]
277,USER Dear Hulu: Please get rid of the new interface. The new interface is TRASH! Thank you for your prompt attention to this matter. Long Time Customer hulu newinterface epicfail,0,1,[Explicit Gratitude],[Thank you]
279,"USER Though I have amazon India account, for next few months, i'll be in South Korea. Will be great if there is a way amazon india can deliver it to here ;) Thank you for the help 😀",0,1,[Explicit Gratitude],[Thank you]
311,"USER hello, is there a possibility to request my oyster travel history older than three months? I need data from July. Thanks a lot :)",0,1,[Explicit Gratitude],[Thanks a]
313,USER I was suppose to land in San Fran at 8:30. I didn’t land u til after midnight. Thanks a lot.,0,1,[Explicit Gratitude],[Thanks a]


## Check the false negatives:

In [93]:
print(len(data[(data['label']==1) & (data['prediction']==0)]))
data[(data['label']==1) & (data['prediction']==0)]

22


Unnamed: 0,text,label,prediction,patterns,matching sequences
2,USER Appreciate it. We love USER and only want to see her succeed in the future... so appreciate you letting me be your (somewhat worried) canary in the coal mine ;),1,0,[],[]
25,USER wanted to give a shoutout to Jeremy at the Chipotle on 121 4th Street 135 in San Francisco. I ordered ahead and it wasn't ready but Jeremy jumped right in to make sure 1 i got my food and 2 I made my train. please recognize him! &amp; let me know :),1,0,[],[]
26,Made my day finding out USER have vegan Belgium Buns !! Really hope they expand more with vegan goodies :D,1,0,[],[]
29,USER Made my day :D,1,0,[],[]
32,"USER USER Ah, my bad! Anyway, a very kind and resourceful man to contact me immediately! :)",1,0,[],[]
42,USER Wow! Hi. I didn’t expect a response but thankful for it. AMZL delivery. It Was originally to arrive yesterday. Then postponed to today. The amazon tracker said it was still out for delivery and would arrive by 8pm. At 11:30pm it still showed the pkg was out for delivery.,1,0,[],[]
53,"USER So punny;) I ❤️ you, Southwest. Thankful for you today especially!!! BestAirlineEver",1,0,[],[]
55,USER That's very kind but I'm 21 hours into travel from Abu Dhabi right now. So very tired and cranky. I'm sure we'll be chatting again around 3pm :). Thanks,1,0,[],[]
56,"USER That was a game I will NEVER forget! As a lifelong Cubs fan, it was worth the wait! :) *TreBrewbaker",1,0,[],[]
59,USER You’re the best! Cheers :),1,0,[],[]


## Check the True Positives

In [94]:
print(len(data[(data['label']==1) & (data['prediction']==1)]))
data[(data['label']==1) & (data['prediction']==1)]

210


Unnamed: 0,text,label,prediction,patterns,matching sequences
0,USER Thank you for the reply! I appreciate the help :) the book is The Art Of Thor: Ragnarok:,1,1,"[Explicit Gratitude, Explicit Appreciation]","[Thank you, I appreciate]"
1,USER Thank you for the reply Goldie. It's not just sainsburys ... I haven't seen them anywhere this year in supermarkets. Not even in the mixed nut bags :)) Appreciate your tweet though. Have a good weekend :),1,1,[Explicit Gratitude],[Thank you]
3,"USER Thanks so much. I meant to tweet not message, so I appreciate the quick reply!! Enjoy your weekend. :-)",1,1,"[Explicit Gratitude, Explicit Appreciation]","[Thanks so, I appreciate]"
4,USER USER Dear USER USER USER thank you for taking action and getting back to me. Case has been solved thank you for your quick reply. Happy Weekend :),1,1,"[Explicit Gratitude, Explicit Gratitude]","[thank you, thank you]"
5,USER Thank you for your help. I found it and it's currently downloading. Appreciate the assistance :),1,1,[Explicit Gratitude],[Thank you]
...,...,...,...,...,...
339,USER USER USER USER Thanks! Appreciate the effort &amp; the update. It'd better be damn quick. I still got some binging of Catfish to do. :),1,1,[Explicit Gratitude],[Thanks!]
340,"USER No I get it, just thought I complain a little cause when I signed up for “limited ads” I thought maybe 3-4 every once in awhile not 2 long ads in a row. But I appreciate you responding! :)",1,1,[Explicit Appreciation],[I appreciate]
343,"USER That's a shame, thanks for looking though! That color combo is what I'm looking for though. Ah well ;) appreciate the attempt!",1,1,[Explicit Gratitude],[thanks for]
346,"USER Thanks, but I've already checked and there is not a single store in my country, I appreciate the effort :D",1,1,"[Explicit Gratitude, Explicit Appreciation]","[Thanks,, I appreciate]"


## Check the True Negatives

In [95]:
print(len(data[(data['label']==0) & (data['prediction']==0)]))
data[(data['label']==0) & (data['prediction']==0)]

277


Unnamed: 0,text,label,prediction,patterns,matching sequences
23,USER I am having trouble printing on envelopes: printer does not recognize paper size hpsa hpprnt,0,0,[],[]
36,USER please recognize flight attendant COCO 4 buying me a drink when my voucher had expired! Orlando -&gt; Denver flight!,0,0,[],[]
46,"Tried to contact Spotify multiple times about the issue I'm having, to no avail. I keep getting ""Error Code: 3"" when I try to log in with my Facebook account, and it is saying it doesn't recognize my account when I have definitely signed up before. USER Can you help?",0,0,[],[]
48,"USER after i click ""nashville"" inside the treasure truck menu it just sits here :)",0,0,[],[]
60,we said we weren’t gonna go black friday shopping but here we are standing in walmart @ 2 am :) thankful futurehubby 💚💍,0,0,[],[]
...,...,...,...,...,...
607,USER 😱😱😱 DM us Becca - we need to get to the bottom of this!,0,0,[],[]
608,USER We'd hate to see you go! We understand the frustration and will share your feedback with our team.,0,0,[],[]
609,"USER We would be too! We're so sorry to hear about the trouble you've been having, Tim. What device are you on? Running into errors?",0,0,[],[]
610,"USER Terribly sorry for the agitation regarding your husband's account, Ailsa! What information were we able to provide when you contacted us about it? ^JZ",0,0,[],[]
