In [1]:
#Make imports
# !pip install fuzzywuzzy
# !pip install python-Levenshtein
# !pip install sklearn
# !pip install pandas
# !pip install numpy
# !pip install multiprocess

import pandas as pd
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import string
import sklearn
import numpy as np
import time
from enum import Enum


from multiprocess import Pool, Process
import multiprocessing as mp
# from multiprocessing import Pool, Process


import nltk
from nltk import pos_tag, pos_tag_sents
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
stop_words = set(stopwords.words('english')) - set(['at', 'do', 'your', 'from', 'to', 'out', 'no', 'the'])

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/manikya_varshney/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/manikya_varshney/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [2]:
start = time.time()
# path = '/Users/priyanshkedia/Documents/Machine Learning/Twitter-Analysis/Latest/output_data/Tue Aug 18/combined_Tue Aug 18.csv'
path = './output_data/Tue Aug 18/combined_Tue Aug 18.csv'
data = pd.read_csv(path, index_col=None, header=0, engine='python' )
end = time.time()
print("Read csv with pandas: ",(end-start),"sec")

Read csv with pandas:  7.500194311141968 sec


In [3]:
keywords_Imp = ['stay at home' , 'do your part', 'Responsible', 
            'home', 'house', 'cancel', 'shutdown', 'postpone',
            'school closure', 'Closure', 'business closure',
            'suspension', 'quarantine', 'lockdown', 'social distance', 
            'social distancing', 'self quarantine', 'isolat', '6-feet',
            'distance', '#clubquarantine', '#quarantinelife', '#quarantineacitivites']

keywords_Ada = ['school from home' , 'learn', 'remote', 'school food service', 
            'online shopping', 'online purchase', 'online church', 'delivery',
            'drive thru', 'to go', 'take out', 'Tiktok', 'Netflix', 'telework', 
            'zoom', 'telehealth', 'telemedicine', 'work from home', 'wfh',
            'working at home', 'working remotely', 'online meeting']

keywords_Ne = ['bored' , 'lonely', 'stress', 
            'anxiety', 'scared', 'worry', 'end', 'cabin fever',
            '#sideeffectsofquarantinelife', 'tissue paper', 'toilet paper']

keywords_Sd = ['social functions' , 'gathering', 'empty streets', 
            'interaction', 'large', 'no cars', 'non-essential',
            'travel', 'unnecessary', 'crowd']

keywords_Purp = ['Flatten the curve' , 'Slow the spread', 'slow transmission', 
            'protect', 'save', '#stayhomesavelives']

keywords_Pe = ['silver lining' , 'optimistic', 'hope', 
            'bright side', 'Safe', '#togetherapart']

In [4]:
# Constants
num_cores = mp.cpu_count()
FINAL_COL_NAME = "FINAL_TEXT"

In [5]:
def keywords_cleaning(keywords_list):
    
    #Convert to lower
    for i in range(len(keywords_list)): 
        keywords_list[i] = keywords_list[i].lower()
    
    #Remove punctuations
    for i in range(len(keywords_list)):
        keywords_list[i] = keywords_list[i].translate(str.maketrans('','',string.punctuation))
    
    #More cleaning
    for i in range(len(keywords_list)):
        keywords_list[i] = keywords_list[i].replace('/[^a-zA-Z0-9 ]/g', '').replace('\n',' ').strip('“').strip('“').strip('’').lstrip(' ').rstrip(' ')

    #Remove stop words
    def remove_stopwords(data):
        output_array=[]
        for sentence in data:
            temp_list=[]
            for word in sentence.split():
                if word not in stop_words:
                    temp_list.append(word)
            output_array.append(' '.join(temp_list))
        return output_array

    keywords_list=remove_stopwords(keywords_list)

    #Stemming
    ps = PorterStemmer()
    keywords_list_stem = [[ps.stem(word) for word in sentence.split(" ")] for sentence in keywords_list]
    keywords_list_final = [" ".join(sentence) for sentence in keywords_list_stem]

    return keywords_list_final


In [6]:
# Enum for facets
class Facets(Enum):
    IMPLEMENTATION = "Imp"
    ADAPTATION = "Ada"
    NEGATIVE_EMOTIONS = "Ne"
    SOCIAL_DISRUPTION = "Sd"
    PURPOSE = "Purp"
    POSITIVE_EMOTION = "Pe"

In [7]:
def fuzzy_logic(row, FINAL_COL_NAME, keywords, facet):
#     row, FINAL_COL_NAME, keywords, facet = args
    keyword_match, score = process.extractOne(row[FINAL_COL_NAME], keywords, scorer = fuzz.partial_ratio)
    row['final_score'] = score
    row['final_keyword_match'] = keyword_match
    return row

In [8]:
def keep_only_highest(data, high_value, facet):
    data['final_score'] = data['final_score'].astype(int)    
    data = data[data['final_score'] == 100].reset_index(drop=True)  
    data['FACET'] = facet.value 
    return data

In [9]:
def proportion(interim_data_final, interim_data):
    numerator = interim_data_final.shape[0]
    denominator = interim_data.shape[0]
    prop_val = (numerator/denominator)
    return prop_val

In [10]:
def _apply_df(args):
    df, kwargs = args
    FINAL_COL_NAME, keywords, facet = kwargs.pop('col_name'), kwargs.pop('keywords'), kwargs.pop('facet')
    return df.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords, facet))

In [11]:
def test_m(df, **kwargs):
    workers = 8
    pool = Pool(processes=workers)
    result = pool.map(_apply_df, [(d, kwargs)
            for d in np.array_split(df, workers)])
    pool.close()
    return pd.concat(list(result))

### Keywords Cleaning

In [12]:
keywords_Imp = keywords_cleaning(keywords_Imp)
keywords_Ada = keywords_cleaning(keywords_Ada)
keywords_Ne = keywords_cleaning(keywords_Ne)
keywords_Sd = keywords_cleaning(keywords_Sd)
keywords_Purp = keywords_cleaning(keywords_Purp)
keywords_Pe = keywords_cleaning(keywords_Pe)

### Drop Rows with Empty Tweets

In [13]:
nan_value = float("NaN")
data[FINAL_COL_NAME].replace("", nan_value, inplace=True)
data.dropna(subset = [FINAL_COL_NAME], inplace=True)

# data = data.head(100)

# 1_Implementation

In [14]:
start = time.time()
# interim_Imp = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Imp, Facets.IMPLEMENTATION))
# interim_Imp = clean_data_and_save(data, FINAL_COL_NAME, keywords_Imp, Facets.IMPLEMENTATION.value)
interim_Imp = test_m(data, col_name = FINAL_COL_NAME, keywords = keywords_Imp, facet = Facets.IMPLEMENTATION.value)
interim_Imp_final = keep_only_highest(interim_Imp, 100, Facets.IMPLEMENTATION)
print("time", time.time() - start)

time 72.69238066673279


In [15]:
interim_Imp_final

Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,quote_count,...,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score,final_keyword_match,FACET
0,Tue Aug 18 19:46:26 +0000 2020,1.295809300645007e+18,"<a href=""http://twitter.com/download/android"" ...",False,,1.2957933158840197e+18,67175426.0,FletchersDogs,en,0.0,...,,,,,Y,False,fletchersdog true look like commun wont the fu...,100,social distanc,Imp
1,Tue Aug 18 19:46:27 +0000 2020,1.2958093013327951e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,False,drink your water pray moistur your skin drink ...,100,home,Imp
2,Tue Aug 18 19:46:28 +0000 2020,1.2958093074018345e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,1.295793161437098e+18,64016042.0,iamdimplekaul,en,0.0,...,,,,,Y,False,iamdimplekaul gembing watch quit themesp mandi...,100,lockdown,Imp
3,Tue Aug 18 19:46:28 +0000 2020,1.2958093093563843e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,False,warn what come veteran california busi owner f...,100,lockdown,Imp
4,Tue Aug 18 19:46:29 +0000 2020,1.2958093106775447e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,1.2958053907944284e+18,1.1635522593991475e+18,btspopmp3,en,0.0,...,,,,,Y,False,btspopmp3 bt lockdown bwl era the dynamit teas...,100,lockdown,Imp
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21121,Tue Aug 18 20:16:14 +0000 2020,1.2958167991040123e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,Y,True,break sampp 500 close at record high eras the ...,100,shutdown,Imp
21122,Tue Aug 18 20:16:14 +0000 2020,1.2958167996451267e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,Y,True,45 out 50 us governor forc covid19 infect pati...,100,home,Imp
21123,Tue Aug 18 20:16:14 +0000 2020,1.2958167997082173e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,True,cant believ china parti america still strict l...,100,lockdown,Imp
21124,Tue Aug 18 20:16:15 +0000 2020,1.2958168013146276e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,True,who still social distanc wear mask limit conta...,100,social distanc,Imp


# 2_Adaptation

In [16]:
start = time.time()
#interim_Ada = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Ada, Facets.ADAPTATION))
interim_Ada = test_m(data, col_name = FINAL_COL_NAME, keywords = keywords_Ada, facet = Facets.ADAPTATION.value)
interim_Ada_final = keep_only_highest(interim_Ada, 100, Facets.ADAPTATION)
print("end", time.time() - start)

end 69.71007061004639


In [17]:
interim_Ada_final

Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,quote_count,...,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score,final_keyword_match,FACET
0,Tue Aug 18 19:46:28 +0000 2020,1.295809307771048e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,Y,False,boypussi tight he behav covid natur asian puss...,100,to go,Ada
1,Tue Aug 18 19:46:32 +0000 2020,1.2958093241833595e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,Y,False,eat vat homemad hummu clear pictur work from home,100,work from home,Ada
2,Tue Aug 18 19:46:37 +0000 2020,1.2958093438295327e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,False,therapist obsess think covid fake tell to go o...,100,to go,Ada
3,Tue Aug 18 19:46:37 +0000 2020,1.2958093449074156e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,Y,False,2020 year adapt learn ecologist assist profess...,100,learn,Ada
4,Tue Aug 18 19:46:38 +0000 2020,1.295809348082512e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,1.2956844342687703e+18,1915531928.0,missalys,en,0.0,...,,,,,Y,False,missali airbnb know lot friend go to portland ...,100,work from home,Ada
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3157,Tue Aug 18 20:15:59 +0000 2020,1.295816736269312e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,Y,True,the peopl test posit to told to go home right ...,100,to go,Ada
3158,Tue Aug 18 20:16:05 +0000 2020,1.2958167591702897e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,True,dont forget financ peopl at home plug 56 numbe...,100,work from home,Ada
3159,Tue Aug 18 20:16:06 +0000 2020,1.2958167669003796e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,Y,True,txt unlearn social distanc find peac,100,learn,Ada
3160,Tue Aug 18 20:16:08 +0000 2020,1.2958167751715471e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,Y,True,txt unlearn social distanc find peac,100,learn,Ada


# 3_Negative Emotion

In [18]:
start = time.time()
# interim_Ne = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Ne, Facets.NEGATIVE_EMOTIONS))
# interim_Ne = test_m(data, col_name = FINAL_COL_NAME, keywords = keywords_Ne, facet = Facets.NEGATIVE_EMOTIONS.value)
interim_Ne = test_m(data, col_name = FINAL_COL_NAME, keywords = keywords_Ne, facet = Facets.NEGATIVE_EMOTIONS.value)
interim_Ne_final = keep_only_highest(interim_Ne, 100, Facets.NEGATIVE_EMOTIONS)
print("time", time.time() - start)

time 62.930540323257446


In [19]:
interim_Ne_final

Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,quote_count,...,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score,final_keyword_match,FACET
0,Tue Aug 18 19:46:27 +0000 2020,1.295809305120313e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,1.2955693930496369e+18,7.623897950372947e+17,TheRightMelissa,en,0.0,...,,,,,Y,False,therightmelissa imagin ignor peopl like forget...,100,worri,Ne
1,Tue Aug 18 19:46:31 +0000 2020,1.295809319729074e+18,"<a href=""http://twitter.com/download/android"" ...",False,,1.295808370843279e+18,9.32391502109692e+17,ajgomesta,en,0.0,...,,,,,Y,False,ajgomesta oh do houston the 4th largest citi t...,100,end,Ne
2,Tue Aug 18 19:46:32 +0000 2020,1.2958093248545219e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,False,gain weight quarantin gain lost gain weight ba...,100,end,Ne
3,Tue Aug 18 19:46:33 +0000 2020,1.2958093302442107e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,Y,False,good listen might help center your anxieti do ...,100,anxieti,Ne
4,Tue Aug 18 19:46:36 +0000 2020,1.2958093402852923e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,False,thousand peopl attend the hoha water electr mu...,100,end,Ne
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5128,Tue Aug 18 20:16:08 +0000 2020,1.2958167748611645e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,Y,True,chines covid19 vaccin contend made stateown ph...,100,end,Ne
5129,Tue Aug 18 20:16:09 +0000 2020,1.2958167753435177e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,True,your scare covid im scare biden presid the,100,scare,Ne
5130,Tue Aug 18 20:16:10 +0000 2020,1.2958167820334203e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,Y,True,1 reflect friend the uk the second wave come s...,100,end,Ne
5131,Tue Aug 18 20:16:10 +0000 2020,1.2958167823227658e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,True,your scare covid im scare biden presid the,100,scare,Ne


# 4_Social Disruption

In [20]:
start = time.time()
# interim_Sd = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Sd, Facets.SOCIAL_DISRUPTION))
interim_Sd = test_m(data, col_name = FINAL_COL_NAME, keywords = keywords_Sd, facet = Facets.SOCIAL_DISRUPTION.value)
interim_Sd_final = keep_only_highest(interim_Sd, 100, Facets.SOCIAL_DISRUPTION)
print("end", time.time() - start)

end 61.86441111564636


In [21]:
interim_Sd_final

Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,quote_count,...,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score,final_keyword_match,FACET
0,Tue Aug 18 19:46:31 +0000 2020,1.295809319729074e+18,"<a href=""http://twitter.com/download/android"" ...",False,,1.295808370843279e+18,9.32391502109692e+17,ajgomesta,en,0.0,...,,,,,Y,False,ajgomesta oh do houston the 4th largest citi t...,100,larg,Sd
1,Tue Aug 18 19:46:31 +0000 2020,1.2958093219896238e+18,"<a href=""https://about.twitter.com/products/tw...",False,,1.295714621614162e+18,2347049341.0,voxdotcom,en,0.0,...,,,,,Y,False,voxdotcom presid trump took unpreced step resp...,100,travel,Sd
2,Tue Aug 18 19:46:40 +0000 2020,1.2958093591259955e+18,"<a href=""http://twitter.com/download/android"" ...",False,,1.2958073087153152e+18,2853461537.0,ScottAdamsSays,en,0.0,...,,,,,Y,False,scottadamssay sweden also larg govern run nurs...,100,larg,Sd
3,Tue Aug 18 19:46:42 +0000 2020,1.295809364838609e+18,"<a href=""http://instagram.com"" rel=""nofollow"">...",False,,,,,en,0.0,...,,,,,Y,False,the last month weve made consum mostli older s...,100,larg,Sd
4,Tue Aug 18 19:46:50 +0000 2020,1.2958093999534694e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,1.2958070791779123e+18,7.145074163820134e+17,EmilyRussellADK,en,0.0,...,,,,,Y,False,emilyrusselladk seem like cover given the pand...,100,travel,Sd
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2560,Tue Aug 18 20:15:57 +0000 2020,1.2958167265048044e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,True,corona the largest scam ever perpetr peopl die...,100,larg,Sd
2561,Tue Aug 18 20:15:58 +0000 2020,1.295816730430718e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,Y,True,guy today wrote upcatet examin let tell one th...,100,crowd,Sd
2562,Tue Aug 18 20:16:03 +0000 2020,1.2958167502026424e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,True,cnntravel ground zero the coronaviru pandem wu...,100,travel,Sd
2563,Tue Aug 18 20:16:04 +0000 2020,1.2958167548122153e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,Y,True,guy today wrote upcatet examin let tell one th...,100,crowd,Sd


# 5_Purpose

In [22]:
start = time.time()
#interim_Purp = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Purp, Facets.PURPOSE))
interim_Purp = test_m(data, col_name = FINAL_COL_NAME, keywords = keywords_Purp, facet = Facets.PURPOSE.value)
interim_Purp_final = keep_only_highest(interim_Purp, 100, Facets.PURPOSE)
print("end", time.time() - start)

end 57.57288432121277


In [23]:
interim_Purp_final

Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,quote_count,...,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score,final_keyword_match,FACET
0,Tue Aug 18 19:46:32 +0000 2020,1.2958093239904827e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,False,cloth face mask give 10 protect no mask at giv...,100,protect,Purp
1,Tue Aug 18 19:46:34 +0000 2020,1.2958093345181286e+18,"<a href=""http://twitter.com/download/android"" ...",False,,1.2958031205100012e+18,20098015.0,davidschneider,en,0.0,...,,,,,Y,False,davidschneid tori blood hand alreadi know half...,100,protect,Purp
2,Tue Aug 18 19:46:35 +0000 2020,1.295809336279806e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,False,save live the right thing the measur even lack...,100,save,Purp
3,Tue Aug 18 19:47:00 +0000 2020,1.295809442206888e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,False,wed like to thank oper team hard work the past...,100,protect,Purp
4,Tue Aug 18 19:47:05 +0000 2020,1.295809462750642e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,Y,False,im proud to wear mask protect peopl help preve...,100,protect,Purp
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2048,Tue Aug 18 20:16:07 +0000 2020,1.295816768502608e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,True,presid deliv arizona the pandem the paycheck p...,100,save,Purp
2049,Tue Aug 18 20:16:09 +0000 2020,1.295816775477715e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,Y,True,compar the ubiquit flatten the curv graph the ...,100,flatten the curv,Purp
2050,Tue Aug 18 20:16:11 +0000 2020,1.2958167854726963e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,True,presid deliv arizona the pandem the paycheck p...,100,save,Purp
2051,Tue Aug 18 20:16:12 +0000 2020,1.2958167894657393e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,Y,True,the coronaviru task forc remain commit to prot...,100,protect,Purp


# 6_Positive Emotion

In [24]:
start = time.time()
#interim_Pe = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Pe, Facets.POSITIVE_EMOTION))
interim_Pe = test_m(data, col_name = FINAL_COL_NAME, keywords = keywords_Pe, facet = Facets.POSITIVE_EMOTION.value)
interim_Pe_final = keep_only_highest(interim_Pe, 100, Facets.POSITIVE_EMOTION)
print("time", time.time() - start)

time 58.37020659446716


In [25]:
interim_Pe_final

Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,quote_count,...,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score,final_keyword_match,FACET
0,Tue Aug 18 19:46:32 +0000 2020,1.295809323398918e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,1.2958093226019676e+18,175263641.0,petersankoff,en,0.0,...,,,,,Y,False,precis hope start the project hard to get foot...,100,hope,Pe
1,Tue Aug 18 19:46:32 +0000 2020,1.295809324812505e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,1.29567571866436e+18,7.122611627637555e+17,AddressingLife,en,0.0,...,,,,,Y,False,addressinglif got the guidelin govt sent howev...,100,safe,Pe
2,Tue Aug 18 19:46:33 +0000 2020,1.295809326846808e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,389683366.0,networkforphl,en,0.0,...,,,,,Y,False,networkforphl new report offer polici recommen...,100,safe,Pe
3,Tue Aug 18 19:46:36 +0000 2020,1.2958093408012452e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,Y,False,one hand the lift lockdown meant danger peopl ...,100,safe,Pe
4,Tue Aug 18 19:46:42 +0000 2020,1.295809366378127e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,29447794.0,uvmvermont,en,0.0,...,,,,,Y,False,uvmvermont the citi burlington work togeth pla...,100,safe,Pe
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2147,Tue Aug 18 20:15:52 +0000 2020,1.2958167066615726e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,Y,True,postponejeeandneet everyth plan accord to the ...,100,safe,Pe
2148,Tue Aug 18 20:16:02 +0000 2020,1.2958167467886305e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,Y,True,the term brand safeti taken liter mean the pandem,100,safe,Pe
2149,Tue Aug 18 20:16:05 +0000 2020,1.295816760801796e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,Y,True,hungarian univers must provid the follow safet...,100,safe,Pe
2150,Tue Aug 18 20:16:06 +0000 2020,1.2958167636623073e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,Y,True,croatia safe binat coupl separ eu coronaviru e...,100,safe,Pe


### All Proportions

In [26]:
print("For IMPLEMENTATION:", proportion(interim_Imp_final, interim_Imp))
print("For ADAPTATION:", proportion(interim_Ada_final, interim_Ada))
print("For NEGATIVE EMOTIONS:", proportion(interim_Ne_final, interim_Ne))
print("For SOCIAL DISRUPTION:", proportion(interim_Sd_final, interim_Sd))
print("For PURPOSE:", proportion(interim_Purp_final, interim_Purp))
print("For POSITIVE EMOTION:", proportion(interim_Pe_final, interim_Pe))

For IMPLEMENTATION: 0.2000416635103401
For ADAPTATION: 0.029940913567154004
For NEGATIVE EMOTIONS: 0.048604272403605785
For SOCIAL DISRUPTION: 0.02428793273236876
For PURPOSE: 0.019439815165517765
For POSITIVE EMOTION: 0.020377244148170593


In [27]:
merged_data = pd.concat([interim_Imp_final, interim_Ada_final, interim_Ne_final, interim_Sd_final, interim_Purp_final, interim_Pe_final])
merged_data = merged_data.reset_index(drop=True)

In [28]:
split = pd.get_dummies(merged_data.FACET, prefix='FACET')

In [29]:
merged_data = merged_data.reset_index(drop=True).merge(split.reset_index(drop=True), left_index=True, right_index=True)

In [30]:
merged_data

Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,quote_count,...,FINAL_TEXT,final_score,final_keyword_match,FACET,FACET_Ada,FACET_Imp,FACET_Ne,FACET_Pe,FACET_Purp,FACET_Sd
0,Tue Aug 18 19:46:26 +0000 2020,1.295809300645007e+18,"<a href=""http://twitter.com/download/android"" ...",False,,1.2957933158840197e+18,67175426.0,FletchersDogs,en,0.0,...,fletchersdog true look like commun wont the fu...,100,social distanc,Imp,0,1,0,0,0,0
1,Tue Aug 18 19:46:27 +0000 2020,1.2958093013327951e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,drink your water pray moistur your skin drink ...,100,home,Imp,0,1,0,0,0,0
2,Tue Aug 18 19:46:28 +0000 2020,1.2958093074018345e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,1.295793161437098e+18,64016042.0,iamdimplekaul,en,0.0,...,iamdimplekaul gembing watch quit themesp mandi...,100,lockdown,Imp,0,1,0,0,0,0
3,Tue Aug 18 19:46:28 +0000 2020,1.2958093093563843e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,warn what come veteran california busi owner f...,100,lockdown,Imp,0,1,0,0,0,0
4,Tue Aug 18 19:46:29 +0000 2020,1.2958093106775447e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,1.2958053907944284e+18,1.1635522593991475e+18,btspopmp3,en,0.0,...,btspopmp3 bt lockdown bwl era the dynamit teas...,100,lockdown,Imp,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36186,Tue Aug 18 20:15:52 +0000 2020,1.2958167066615726e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,postponejeeandneet everyth plan accord to the ...,100,safe,Pe,0,0,0,1,0,0
36187,Tue Aug 18 20:16:02 +0000 2020,1.2958167467886305e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,the term brand safeti taken liter mean the pandem,100,safe,Pe,0,0,0,1,0,0
36188,Tue Aug 18 20:16:05 +0000 2020,1.295816760801796e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,hungarian univers must provid the follow safet...,100,safe,Pe,0,0,0,1,0,0
36189,Tue Aug 18 20:16:06 +0000 2020,1.2958167636623073e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,croatia safe binat coupl separ eu coronaviru e...,100,safe,Pe,0,0,0,1,0,0
