# Meta Automation


## Natural language processing

emotion analysis
https://www.aclweb.org/anthology/D18-1404.pdf
https://www.kaggle.com/praveengovi/emotions-dataset-for-nlp

In [31]:
import pandas as pd

# load text with slightly hacky usage of the Pandas `read_csv()` function
df_train = pd.read_csv('emotions/train.txt', sep=';', names=['text', 'emotion'])
df_train

Unnamed: 0,text,emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
...,...,...
15995,i just had a very brief time in the beanbag an...,sadness
15996,i am now turning and i feel pathetic that i am...,sadness
15997,i feel strong and good overall,joy
15998,i feel like this was such a rude comment and i...,anger


In [32]:
df_train['emotion'].value_counts()

joy         5362
sadness     4666
anger       2159
fear        1937
love        1304
surprise     572
Name: emotion, dtype: int64

In [33]:
# prefix emotion labels based on fastText's expected format
df_train['emotion'] = '__label__' + df_train['emotion'].str[:]
df_train['emotion'].value_counts()

__label__joy         5362
__label__sadness     4666
__label__anger       2159
__label__fear        1937
__label__love        1304
__label__surprise     572
Name: emotion, dtype: int64

In [46]:
from numpy import savetxt

# positively leverage "leaky abstraction" by using Numpy `savetxt()`
# output DataFrame as text file that each line would look like:
# __label__joy i feel strong and good overall
savetxt('emotions_train.txt', df_train[['emotion', 'text']].values, fmt='%s', delimiter=' ')

### Pre-process abstraction

In [42]:
def pre_process(fname: str, output: str):
    # load text
    df = pd.read_csv(fname, sep=';', names=['text', 'emotion'])
    # prefix emotions with __label__ for fastText standard
    df['emotion'] = '__label__' + df['emotion'].str[:]
    # output as fastText expected text file
    savetxt(output, df[['emotion', 'text']].values, fmt='%s', delimiter=' ')

In [43]:
pre_process('emotions/val.txt', 'emotions_val.txt')
pre_process('emotions/test.txt', 'emotions_test.txt')

In [47]:
import fasttext

model = fasttext.train_supervised(
    input='emotions_train.txt',
    autotuneValidationFile='emotions_val.txt',
)

In [48]:
model.test('emotions_test.txt')

(2000, 0.883, 0.883)

In [50]:
model.predict('lol')

(('__label__joy',), array([0.99999237]))

In [57]:
text = "Sorry honey, I tried to get flowers but now I'm just drunk in sitting in a pile of dandelion leafs. Happens to the best of us"
model.predict(text)

(('__label__joy',), array([0.47063491]))

In [58]:
model.predict(text.lower())

(('__label__sadness',), array([0.98739922]))

In [59]:
model.predict(text.lower().replace("'", ''))

(('__label__sadness',), array([0.98539132]))

In [126]:
labels, scores = model.predict(text.lower().replace("'", ''))
print(labels[0].split('__label__')[-1], round(scores[0], 3))

sadness 0.985


### Post-process abstraction

In [127]:
def emotion_score(model, text):
    labels, scores = model.predict(text.lower().replace("'", ''))
    return {'emotion': labels[0].split('__label__')[-1], 'score': round(scores[0], 3)}

In [71]:
emotion_score(model, "Sorry honey, I tried to get flowers but now I'm just drunk in sitting in a pile of dandelion leafs. Happens to the best of us")

{'emotion': 'sadness', 'score': 0.985}

In [72]:
emotion_score(model, 'Sounds like a great cause...also have two teenagers so I appreciate the cause and underlying issues too.')

{'emotion': 'joy', 'score': 0.74}

In [76]:
emotion_score(model, "I have a mild fascination with city planning (ask me for good content sources if you're also into that), but stumbled on to two great vids on the TTC's subway evolution and GO's rail lines.")

{'emotion': 'joy', 'score': 0.997}

### Build a case

In [80]:
# download a youtube video's CC
import requests

# CC obtained from https://www.youtube.com/watch?v=CzSMC5rWvos
# inspect network activity and search for `timedtext`
# copy the URL from the GET request below
cc = requests.get('https://www.youtube.com/api/timedtext?v=CzSMC5rWvos&asr_langs=de,en,es,fr,it,ja,ko,nl,pt,ru&caps=asr&exp=xftt&xorp=true&xoaf=5&hl=en&ip=0.0.0.0&ipbits=0&expire=1615438222&sparams=ip,ipbits,expire,v,asr_langs,caps,exp,xorp,xoaf&signature=BC765D899C9DDFCA652A7CA6F9FDFA41A81EA48C.2188568EF5C2E4920AC3214117736DD0295AE2E2&key=yt8&lang=en&fmt=json3&xorb=2&xobt=3&xovt=3')
cc = cc.json().get('events', [])
df_cc = pd.DataFrame(cc)
df_cc

Unnamed: 0,tStartMs,dDurationMs,segs
0,190,5680,[{'utf8': 'I'm often asked -- and occasionally...
1,5870,5309,"[{'utf8': 'And it's like, you know, the only ""..."
2,11179,2271,[{'utf8': 'I don't associate with movements.'}]
3,13450,3180,"[{'utf8': 'I'm not an ""ism.""'}]"
4,16630,2950,[{'utf8': 'I just - I think for myself.'}]
5,19580,9000,[{'utf8': 'The moment when someone attaches to...
6,28580,4779,[{'utf8': 'and all the rest of the philosophy ...
7,33359,4101,[{'utf8': 'a conversation they will assert tha...
8,37460,3750,[{'utf8': 'to know about you because of that a...
9,41210,1789,[{'utf8': 'And that's not the way to have a co...


In [137]:
# a bit of cleanup
def parse_segs(segs):
    # assume a single segment subtitle
    # obtain only the first segment
    seg = segs[0]
    # assume all keys are `utf8` here
    text = seg.get('utf8', '')
    return ' '.join(text.lower().replace("'", '').split('\n'))

df_cc['text'] = df_cc['segs'].apply(parse_segs)
df = df_cc[['text']]
df

Unnamed: 0,text
0,im often asked -- and occasionally in an accus...
1,"and its like, you know, the only ""ist"" i am is..."
2,i dont associate with movements.
3,"im not an ""ism."""
4,i just - i think for myself.
5,the moment when someone attaches to a philosop...
6,and all the rest of the philosophy that goes w...
7,a conversation they will assert that they alre...
8,to know about you because of that association.
9,and thats not the way to have a conversation.


In [138]:
# apply our emotion model
def add_emotion(row):
    em = emotion_score(model, row['text'])
    return pd.Series({**row, **em})

df = df.apply(add_emotion, axis=1)
df

Unnamed: 0,text,emotion,score
0,im often asked -- and occasionally in an accus...,anger,0.898
1,"and its like, you know, the only ""ist"" i am is...",joy,0.409
2,i dont associate with movements.,fear,0.367
3,"im not an ""ism.""",joy,0.992
4,i just - i think for myself.,joy,0.597
5,the moment when someone attaches to a philosop...,anger,0.573
6,and all the rest of the philosophy that goes w...,anger,0.724
7,a conversation they will assert that they alre...,joy,1.0
8,to know about you because of that association.,joy,0.733
9,and thats not the way to have a conversation.,anger,0.97


In [131]:
df.value_counts('emotion')

emotion
joy         48
anger       30
sadness      4
fear         2
surprise     1
dtype: int64

In [141]:
df_80 = df.query('score > 0.8')
df_80

Unnamed: 0,text,emotion,score
0,im often asked -- and occasionally in an accus...,anger,0.898
3,"im not an ""ism.""",joy,0.992
7,a conversation they will assert that they alre...,joy,1.0
9,and thats not the way to have a conversation.,anger,0.97
10,im sorry.,joy,0.966
13,"and assert, you know, whats going to happen in...",joy,0.909
19,supported.,joy,0.981
21,okay.,joy,0.981
22,im constantly claimed by atheists.,fear,0.802
23,i find this intriguing.,joy,0.993


In [142]:
df_80.value_counts('emotion')

emotion
joy         20
anger        5
surprise     1
fear         1
dtype: int64