In [1]:
#import sample csv
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
import re
from sklearn import preprocessing

data = pd.read_csv('tweets/sample1.csv')
data.head()

Unnamed: 0,screen_name,created_at,location,state_abb,state,source,hashtags,text
0,munn_dayle,2020-04-21 22:24:59,"Georgia, USA",GA,Georgia,Twitter for Android,,Coronavirus Panel Recommends Against Use Of Hy...
1,blake31,2020-04-21 22:24:59,"Clemson, SC",SC,South Carolina,Twitter for iPhone,,I swear once all this covid stuff is over I’m ...
2,thatsopheven,2020-04-21 22:24:59,"Detroit, MI",MI,Michigan,Twitter for iPhone,,I will retweet any information about injustice...
3,BugKlr,2020-04-21 22:24:59,"West Virginia, USA",WV,West Virginia,Twitter for Android,,No Police Raids? Syracuse Mosque Continues Hol...
4,TrewStories,2020-04-21 22:24:59,"Maine, USA",ME,Maine,Twitter Web App,,If half of all US COVID-19 deaths were in Wyom...


In [2]:
data = data.drop('screen_name', axis = 1)
data.head()

Unnamed: 0,created_at,location,state_abb,state,source,hashtags,text
0,2020-04-21 22:24:59,"Georgia, USA",GA,Georgia,Twitter for Android,,Coronavirus Panel Recommends Against Use Of Hy...
1,2020-04-21 22:24:59,"Clemson, SC",SC,South Carolina,Twitter for iPhone,,I swear once all this covid stuff is over I’m ...
2,2020-04-21 22:24:59,"Detroit, MI",MI,Michigan,Twitter for iPhone,,I will retweet any information about injustice...
3,2020-04-21 22:24:59,"West Virginia, USA",WV,West Virginia,Twitter for Android,,No Police Raids? Syracuse Mosque Continues Hol...
4,2020-04-21 22:24:59,"Maine, USA",ME,Maine,Twitter Web App,,If half of all US COVID-19 deaths were in Wyom...


In [3]:
#remove the columns other than text
data = data.drop(['created_at','location','state_abb','state','source','hashtags'], axis=1)
data.head()

Unnamed: 0,text
0,Coronavirus Panel Recommends Against Use Of Hy...
1,I swear once all this covid stuff is over I’m ...
2,I will retweet any information about injustice...
3,No Police Raids? Syracuse Mosque Continues Hol...
4,If half of all US COVID-19 deaths were in Wyom...


In [4]:
#making all text lowercase
data['text'] = data['text'].apply(lambda x: " ".join(x.lower() for x in x.split()))
data.head()

Unnamed: 0,text
0,coronavirus panel recommends against use of hy...
1,i swear once all this covid stuff is over i’m ...
2,i will retweet any information about injustice...
3,no police raids? syracuse mosque continues hol...
4,if half of all us covid-19 deaths were in wyom...


In [5]:
# removing punctuation & symbols 
data['text'] = data['text'].str.replace('[^\w\s]',' ')
data.head()

Unnamed: 0,text
0,coronavirus panel recommends against use of hy...
1,i swear once all this covid stuff is over i m ...
2,i will retweet any information about injustice...
3,no police raids syracuse mosque continues hol...
4,if half of all us covid 19 deaths were in wyom...


In [6]:
# removing stop words using NLTK
stop = stopwords.words('english')
data['text'] = data['text'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
data.head()

Unnamed: 0,text
0,coronavirus panel recommends use hydroxychloro...
1,swear covid stuff going back church wanted pas...
2,retweet information injustice toward black peo...
3,police raids syracuse mosque continues holding...
4,half us covid 19 deaths wyoming would nyc will...


In [7]:
# loading emotion words for reference
angry = "aggravation, annoyance, exasperation, irritation, vexation, acrimoniousness, acrimony, animosity, antagonism, antipathy, bile, biliousness, bitterness, contempt, embitterment, empoisonment, enmity, grudge, hostility, rancor, envy, jaundice, jealousy, pique, resentment, malevolence, malice, spite, vengefulness, venom, vindictiveness, virulence, vitriol, belligerence, contentiousness, contrariness, crankiness, disputatiousness, hot-headedness, irascibility, irascibleness, irritability, orneriness, pugnaciousness, pugnacity, quarrelsomeness, querulousness, blowup, flare, flare-up, outburst chafe, dander, dudgeon, huff, pet, rise, ruffle, temper, air rage, road rage, delirium, heat, passion, warmth, sociopath, injustice, racist, raids"
angry_words = angry.split(', ')
print(len(angry_words))
sad = "bad, blue, brokenhearted, cast down, crestfallen, dejected, depressed, despondent, death, disconsolate, doleful, down, downcast, downhearted, down in the mouth, droopy, forlorn, gloomy, glum, hangdog, heartbroken, heartsick, heartsore, heavyhearted, inconsolable, joyless, low, low-spirited, melancholic, melancholy, miserable, mournful, saddened, sorrowful, sorry, unhappy, woebegone, woeful, wretched, aggrieved, distressed, troubled, uneasy, unquiet, upset, worried, despairing, hopeless, sunk, disappointed, discouraged, disheartened, dispirited, suicidal, dolorous, lachrymose, lugubrious, plaintive, tearful, regretful, rueful, agonized, anguished, grieving, wailing, weeping, black, bleak, cheerless, comfortless, dark, darkening, depressing, desolate, dismal, drear, dreary, elegiac, funereal, gray, morbid, morose, murky, saturnine, somber, sullen"
sad_words = sad.split(', ')
print(len(sad_words))
frustrated = "barred, blocked, clogged, encumbered, fettered, hampered, handicapped, held back, hindered, hobbled, impeded, inhibited, interfered, manacled, obstructed, shackled, tied up, trammeled, arrested, checked, halted, set back, short circuited, stalled, stopped, averted, forestalled, obviated, precluded, prevented, negated, neutralized, nullified, counteracted, offset, conquered, defeated, overcame, displeased, distressed, disturbed, perturbed, troubled, upset, angered, angry, indignant, inflamed, enflamed, infuriated, irate, ireful, mad, outraged, rankled, riled, roiled, shirty, sore, steaming, bristly, cross, disapproving, huffy, piqued, resentful, bearish, bilious, cantankerous, choleric, churlish, crabby, cranky, dyspeptic, fretful, fussy, grouchy, grumpy, ill humored, irascible, irritable, peevish, petulant, snappish, snuffy, testy, touchy, badgered, bedeviled, frustrated, haggled, harassed, harried, hassled, inconvenienced, persecuted, pestered, pinpricked, plagued, provoked, tested, tormented, tortured, tired"
frustrated_words = frustrated.split(', ')
print(len(frustrated_words))
happy = "content, delighted, rejoice, happy, blissful , prosper, pleasure, glad, grateful, proud, joy, smile, hope, justice, positivity, positive, purpose, love, benevolent, cheer, ecstasy, gladly, gleeful, paradise, playful, church, prayer, miracle, relief, help, thanks, support, heroes, academy, accept, achieve, record, victory, win, recover, reduce, increase, happy"
happy_words = happy.split(', ')
print(len(happy_words))

68
86
104
43


In [8]:
def check_emotion(clean_tweet):
    tempdict = {
    'happy' : 0,
    'sad' : 0,
    'angry' : 0,
    'frustrated' : 0}
    current_tweet = clean_tweet.split()
    for word in current_tweet:
        if word in happy_words:
            tempdict['happy'] += 1
        elif word in sad_words:
            tempdict['sad'] += 1 
        elif word in angry_words:
            tempdict['angry'] += 1 
        elif word in frustrated_words:
            tempdict['frustrated'] += 1 
    if max(tempdict.values()) == 0:
        return "neutral"
    else:
        emotion = max(tempdict, key = tempdict.get)
        return emotion

In [9]:
# determing emotion using check_emotion function
data['emotion'] = data['text'].apply(check_emotion)
data.head()

Unnamed: 0,text,emotion
0,coronavirus panel recommends use hydroxychloro...,neutral
1,swear covid stuff going back church wanted pas...,happy
2,retweet information injustice toward black peo...,sad
3,police raids syracuse mosque continues holding...,happy
4,half us covid 19 deaths wyoming would nyc will...,neutral


In [11]:
#Example of check_emotion function working
print("Enter a statement for emotion")
emo = check_emotion(str(input()))
print("The emotion is " + emo)

Enter a statement for emotion
people are getting depressed as death increases
The emotion is sad
