In [1]:
#Make imports
# !pip install fuzzywuzzy
# !pip install python-Levenshtein
# !pip install sklearn
# !pip install pandas
# !pip install numpy

import pandas as pd
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import string
import sklearn
import numpy as np
import time
from enum import Enum

!pip install multiprocess
import multiprocess
import multiprocessing as mp
from multiprocessing import Pool, Process


import nltk
from nltk import pos_tag, pos_tag_sents
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
stop_words = set(stopwords.words('english')) - set(['at', 'do', 'your', 'from', 'to', 'out', 'no', 'the'])

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


[nltk_data] Downloading package stopwords to
[nltk_data]     /home/manikya_varshney/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/manikya_varshney/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [2]:
start = time.time()
path = '/home/manikya_varshney/Documents/Python/Yale/Latest/output_data/Tue Aug 18/combined_Tue Aug 18.csv'
data = pd.read_csv(path, index_col=None, header=0, engine='python' )
end = time.time()
print("Read csv with pandas: ",(end-start),"sec")

Read csv with pandas:  6.971632719039917 sec


In [3]:
keywords_Imp = ['stay at home' , 'do your part', 'Responsible', 
            'home', 'house', 'cancel', 'shutdown', 'postpone',
            'school closure', 'Closure', 'business closure',
            'suspension', 'quarantine', 'lockdown', 'social distance', 
            'social distancing', 'self quarantine', 'isolat', '6-feet',
            'distance', '#clubquarantine', '#quarantinelife', '#quarantineacitivites']

keywords_Ada = ['school from home' , 'learn', 'remote', 'school food service', 
            'online shopping', 'online purchase', 'online church', 'delivery',
            'drive thru', 'to go', 'take out', 'Tiktok', 'Netflix', 'telework', 
            'zoom', 'telehealth', 'telemedicine', 'work from home', 'wfh',
            'working at home', 'working remotely', 'online meeting']

keywords_Ne = ['bored' , 'lonely', 'stress', 
            'anxiety', 'scared', 'worry', 'end', 'cabin fever',
            '#sideeffectsofquarantinelife', 'tissue paper', 'toilet paper']

keywords_Sd = ['social functions' , 'gathering', 'empty streets', 
            'interaction', 'large', 'no cars', 'non-essential',
            'travel', 'unnecessary', 'crowd']

keywords_Purp = ['Flatten the curve' , 'Slow the spread', 'slow transmission', 
            'protect', 'save', '#stayhomesavelives']

keywords_Pe = ['silver lining' , 'optimistic', 'hope', 
            'bright side', 'Safe', '#togetherapart']

In [5]:
# Constants
num_cores = mp.cpu_count()
FINAL_COL_NAME = "FINAL_TEXT"

In [6]:
def keywords_cleaning(keywords_list):
    
    #Convert to lower
    for i in range(len(keywords_list)): 
        keywords_list[i] = keywords_list[i].lower()
    
    #Remove punctuations
    for i in range(len(keywords_list)):
        keywords_list[i] = keywords_list[i].translate(str.maketrans('','',string.punctuation))
    
    #More cleaning
    for i in range(len(keywords_list)):
        keywords_list[i] = keywords_list[i].replace('/[^a-zA-Z0-9 ]/g', '').replace('\n',' ').strip('“').strip('“').strip('’').lstrip(' ').rstrip(' ')

    #Remove stop words
    def remove_stopwords(data):
        output_array=[]
        for sentence in data:
            temp_list=[]
            for word in sentence.split():
                if word not in stop_words:
                    temp_list.append(word)
            output_array.append(' '.join(temp_list))
        return output_array

    keywords_list=remove_stopwords(keywords_list)

    #Stemming
    ps = PorterStemmer()
    keywords_list_stem = [[ps.stem(word) for word in sentence.split(" ")] for sentence in keywords_list]
    keywords_list_final = [" ".join(sentence) for sentence in keywords_list_stem]

    return keywords_list_final


In [7]:
# Enum for facets
class Facets(Enum):
    IMPLEMENTATION = "Imp"
    ADAPTATION = "Ada"
    NEGATIVE_EMOTIONS = "Ne"
    SOCIAL_DISRUPTION = "Sd"
    PURPOSE = "Purp"
    POSITIVE_EMOTION = "Pe"

In [8]:
def fuzzy_logic(row, FINAL_COL_NAME, keywords, facet):
    keyword_match, score = process.extractOne(row[FINAL_COL_NAME], keywords, scorer = fuzz.partial_ratio)
    row['final_score_{}'.format(facet.value)] = score
    row['final_keyword_match_{}'.format(facet.value)] = keyword_match
    return row

In [9]:
def keep_only_highest(data, high_value, facet):
    data['final_score_{}'.format(facet.value)] = data['final_score_{}'.format(facet.value)].astype(int)    
    data = data[data['final_score_{}'.format(facet.value)] == 100].reset_index(drop=True)    
    return data

In [10]:
def proportion(interim_data_final, interim_data):
    numerator = interim_data_final.shape[0]
    denominator = interim_data.shape[0]
    prop_val = (numerator/denominator)
    return prop_val

In [11]:
def split_dataframe(df, nums = 4): 
    chunks = list()
    num_chunks = nums
    chunk_size = len(df) // nums
    for i in range(num_chunks):
        chunks.append(df[i*chunk_size:(i+1)*chunk_size])
    return chunks

In [12]:
def multiprocessing_(data_, keywords_, facet_):
    print(data_.shape)
    start = time.time()
    pool = Pool(num_cores)
    df = split_dataframe(data_, num_cores)
    data1 = pool.starmap(fuzzy_logic, [(x, FINAL_COL_NAME, keywords_, facet_) for x in df])
    pool.close()
    pool.join()
    end = time.time()
    print("time = ", end - start)
    return data1

### Keywords Cleaning

In [13]:
keywords_Imp = keywords_cleaning(keywords_Imp)
keywords_Ada = keywords_cleaning(keywords_Ada)
keywords_Ne = keywords_cleaning(keywords_Ne)
keywords_Sd = keywords_cleaning(keywords_Sd)
keywords_Purp = keywords_cleaning(keywords_Purp)
keywords_Pe = keywords_cleaning(keywords_Pe)

### Drop Rows with Empty Tweets

In [14]:
nan_value = float("NaN")
data[FINAL_COL_NAME].replace("", nan_value, inplace=True)
data.dropna(subset = [FINAL_COL_NAME], inplace=True)

# 1_Implementation

In [15]:
#interim_Imp = multiprocessing_(data, keywords_Imp, Facets.IMPLEMENTATION)
interim_Imp = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Imp, Facets.IMPLEMENTATION))
interim_Imp_final = keep_only_highest(interim_Imp, 100, Facets.IMPLEMENTATION)

In [16]:
interim_Imp_final

Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,quote_count,...,QT_place_full_name,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score_Imp,final_keyword_match_Imp
0,Tue Aug 18 19:46:26 +0000 2020,1.295809300645007e+18,"<a href=""http://twitter.com/download/android"" ...",False,,1.2957933158840197e+18,67175426.0,FletchersDogs,en,0.0,...,,,,,,Y,False,fletchersdog true look like commun wont the fu...,100,social distanc
1,Tue Aug 18 19:46:27 +0000 2020,1.2958093013327951e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,False,drink your water pray moistur your skin drink ...,100,home
2,Tue Aug 18 19:46:28 +0000 2020,1.2958093074018345e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,1.295793161437098e+18,64016042.0,iamdimplekaul,en,0.0,...,,,,,,Y,False,iamdimplekaul gembing watch quit themesp mandi...,100,lockdown
3,Tue Aug 18 19:46:28 +0000 2020,1.2958093093563843e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,False,warn what come veteran california busi owner f...,100,lockdown
4,Tue Aug 18 19:46:29 +0000 2020,1.2958093106775447e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,1.2958053907944284e+18,1.1635522593991475e+18,btspopmp3,en,0.0,...,,,,,,Y,False,btspopmp3 bt lockdown bwl era the dynamit teas...,100,lockdown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21121,Tue Aug 18 20:16:14 +0000 2020,1.2958167991040123e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,,Y,True,break sampp 500 close at record high eras the ...,100,shutdown
21122,Tue Aug 18 20:16:14 +0000 2020,1.2958167996451267e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,,Y,True,45 out 50 us governor forc covid19 infect pati...,100,home
21123,Tue Aug 18 20:16:14 +0000 2020,1.2958167997082173e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,True,cant believ china parti america still strict l...,100,lockdown
21124,Tue Aug 18 20:16:15 +0000 2020,1.2958168013146276e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,True,who still social distanc wear mask limit conta...,100,social distanc


# 2_Adaptation

In [17]:
interim_Ada = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Ada, Facets.ADAPTATION))
interim_Ada_final = keep_only_highest(interim_Ada, 100, Facets.ADAPTATION)

In [18]:
interim_Ada_final

Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,quote_count,...,QT_place_full_name,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score_Ada,final_keyword_match_Ada
0,Tue Aug 18 19:46:28 +0000 2020,1.295809307771048e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,,Y,False,boypussi tight he behav covid natur asian puss...,100,to go
1,Tue Aug 18 19:46:32 +0000 2020,1.2958093241833595e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,,Y,False,eat vat homemad hummu clear pictur work from home,100,work from home
2,Tue Aug 18 19:46:37 +0000 2020,1.2958093438295327e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,False,therapist obsess think covid fake tell to go o...,100,to go
3,Tue Aug 18 19:46:37 +0000 2020,1.2958093449074156e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,,Y,False,2020 year adapt learn ecologist assist profess...,100,learn
4,Tue Aug 18 19:46:38 +0000 2020,1.295809348082512e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,1.2956844342687703e+18,1915531928.0,missalys,en,0.0,...,,,,,,Y,False,missali airbnb know lot friend go to portland ...,100,work from home
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3157,Tue Aug 18 20:15:59 +0000 2020,1.295816736269312e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,,Y,True,the peopl test posit to told to go home right ...,100,to go
3158,Tue Aug 18 20:16:05 +0000 2020,1.2958167591702897e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,True,dont forget financ peopl at home plug 56 numbe...,100,work from home
3159,Tue Aug 18 20:16:06 +0000 2020,1.2958167669003796e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,,Y,True,txt unlearn social distanc find peac,100,learn
3160,Tue Aug 18 20:16:08 +0000 2020,1.2958167751715471e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,,Y,True,txt unlearn social distanc find peac,100,learn


# 3_Negative Emotion

In [19]:
interim_Ne = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Ne, Facets.NEGATIVE_EMOTIONS))
interim_Ne_final = keep_only_highest(interim_Ne, 100, Facets.NEGATIVE_EMOTIONS)

In [20]:
interim_Ne_final

Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,quote_count,...,QT_place_full_name,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score_Ne,final_keyword_match_Ne
0,Tue Aug 18 19:46:27 +0000 2020,1.295809305120313e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,1.2955693930496369e+18,7.623897950372947e+17,TheRightMelissa,en,0.0,...,,,,,,Y,False,therightmelissa imagin ignor peopl like forget...,100,worri
1,Tue Aug 18 19:46:31 +0000 2020,1.295809319729074e+18,"<a href=""http://twitter.com/download/android"" ...",False,,1.295808370843279e+18,9.32391502109692e+17,ajgomesta,en,0.0,...,,,,,,Y,False,ajgomesta oh do houston the 4th largest citi t...,100,end
2,Tue Aug 18 19:46:32 +0000 2020,1.2958093248545219e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,False,gain weight quarantin gain lost gain weight ba...,100,end
3,Tue Aug 18 19:46:33 +0000 2020,1.2958093302442107e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,,Y,False,good listen might help center your anxieti do ...,100,anxieti
4,Tue Aug 18 19:46:36 +0000 2020,1.2958093402852923e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,False,thousand peopl attend the hoha water electr mu...,100,end
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5128,Tue Aug 18 20:16:08 +0000 2020,1.2958167748611645e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,,Y,True,chines covid19 vaccin contend made stateown ph...,100,end
5129,Tue Aug 18 20:16:09 +0000 2020,1.2958167753435177e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,True,your scare covid im scare biden presid the,100,scare
5130,Tue Aug 18 20:16:10 +0000 2020,1.2958167820334203e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,,Y,True,1 reflect friend the uk the second wave come s...,100,end
5131,Tue Aug 18 20:16:10 +0000 2020,1.2958167823227658e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,True,your scare covid im scare biden presid the,100,scare


# 4_Social Disruption

In [21]:
interim_Sd = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Sd, Facets.SOCIAL_DISRUPTION))
interim_Sd_final = keep_only_highest(interim_Sd, 100, Facets.SOCIAL_DISRUPTION)

In [22]:
interim_Sd_final

Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,quote_count,...,QT_place_full_name,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score_Sd,final_keyword_match_Sd
0,Tue Aug 18 19:46:31 +0000 2020,1.295809319729074e+18,"<a href=""http://twitter.com/download/android"" ...",False,,1.295808370843279e+18,9.32391502109692e+17,ajgomesta,en,0.0,...,,,,,,Y,False,ajgomesta oh do houston the 4th largest citi t...,100,larg
1,Tue Aug 18 19:46:31 +0000 2020,1.2958093219896238e+18,"<a href=""https://about.twitter.com/products/tw...",False,,1.295714621614162e+18,2347049341.0,voxdotcom,en,0.0,...,,,,,,Y,False,voxdotcom presid trump took unpreced step resp...,100,travel
2,Tue Aug 18 19:46:40 +0000 2020,1.2958093591259955e+18,"<a href=""http://twitter.com/download/android"" ...",False,,1.2958073087153152e+18,2853461537.0,ScottAdamsSays,en,0.0,...,,,,,,Y,False,scottadamssay sweden also larg govern run nurs...,100,larg
3,Tue Aug 18 19:46:42 +0000 2020,1.295809364838609e+18,"<a href=""http://instagram.com"" rel=""nofollow"">...",False,,,,,en,0.0,...,,,,,,Y,False,the last month weve made consum mostli older s...,100,larg
4,Tue Aug 18 19:46:50 +0000 2020,1.2958093999534694e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,1.2958070791779123e+18,7.145074163820134e+17,EmilyRussellADK,en,0.0,...,,,,,,Y,False,emilyrusselladk seem like cover given the pand...,100,travel
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2560,Tue Aug 18 20:15:57 +0000 2020,1.2958167265048044e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,True,corona the largest scam ever perpetr peopl die...,100,larg
2561,Tue Aug 18 20:15:58 +0000 2020,1.295816730430718e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,,Y,True,guy today wrote upcatet examin let tell one th...,100,crowd
2562,Tue Aug 18 20:16:03 +0000 2020,1.2958167502026424e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,True,cnntravel ground zero the coronaviru pandem wu...,100,travel
2563,Tue Aug 18 20:16:04 +0000 2020,1.2958167548122153e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,,Y,True,guy today wrote upcatet examin let tell one th...,100,crowd


# 5_Purpose

In [23]:
interim_Purp = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Purp, Facets.PURPOSE))
interim_Purp_final = keep_only_highest(interim_Purp, 100, Facets.PURPOSE)

In [24]:
interim_Purp_final

Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,quote_count,...,QT_place_full_name,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score_Purp,final_keyword_match_Purp
0,Tue Aug 18 19:46:32 +0000 2020,1.2958093239904827e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,False,cloth face mask give 10 protect no mask at giv...,100,protect
1,Tue Aug 18 19:46:34 +0000 2020,1.2958093345181286e+18,"<a href=""http://twitter.com/download/android"" ...",False,,1.2958031205100012e+18,20098015.0,davidschneider,en,0.0,...,,,,,,Y,False,davidschneid tori blood hand alreadi know half...,100,protect
2,Tue Aug 18 19:46:35 +0000 2020,1.295809336279806e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,False,save live the right thing the measur even lack...,100,save
3,Tue Aug 18 19:47:00 +0000 2020,1.295809442206888e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,False,wed like to thank oper team hard work the past...,100,protect
4,Tue Aug 18 19:47:05 +0000 2020,1.295809462750642e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,,Y,False,im proud to wear mask protect peopl help preve...,100,protect
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2048,Tue Aug 18 20:16:07 +0000 2020,1.295816768502608e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,True,presid deliv arizona the pandem the paycheck p...,100,save
2049,Tue Aug 18 20:16:09 +0000 2020,1.295816775477715e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,,Y,True,compar the ubiquit flatten the curv graph the ...,100,flatten the curv
2050,Tue Aug 18 20:16:11 +0000 2020,1.2958167854726963e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,True,presid deliv arizona the pandem the paycheck p...,100,save
2051,Tue Aug 18 20:16:12 +0000 2020,1.2958167894657393e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,,,,Y,True,the coronaviru task forc remain commit to prot...,100,protect


# 6_Positive Emotion

In [25]:
interim_Pe = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Pe, Facets.POSITIVE_EMOTION))
interim_Pe_final = keep_only_highest(interim_Pe, 100, Facets.POSITIVE_EMOTION)

In [26]:
interim_Pe_final

Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,quote_count,...,QT_place_full_name,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score_Pe,final_keyword_match_Pe
0,Tue Aug 18 19:46:32 +0000 2020,1.295809323398918e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,1.2958093226019676e+18,175263641.0,petersankoff,en,0.0,...,,,,,,Y,False,precis hope start the project hard to get foot...,100,hope
1,Tue Aug 18 19:46:32 +0000 2020,1.295809324812505e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,1.29567571866436e+18,7.122611627637555e+17,AddressingLife,en,0.0,...,,,,,,Y,False,addressinglif got the guidelin govt sent howev...,100,safe
2,Tue Aug 18 19:46:33 +0000 2020,1.295809326846808e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,389683366.0,networkforphl,en,0.0,...,,,,,,Y,False,networkforphl new report offer polici recommen...,100,safe
3,Tue Aug 18 19:46:36 +0000 2020,1.2958093408012452e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,,Y,False,one hand the lift lockdown meant danger peopl ...,100,safe
4,Tue Aug 18 19:46:42 +0000 2020,1.295809366378127e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,29447794.0,uvmvermont,en,0.0,...,,,,,,Y,False,uvmvermont the citi burlington work togeth pla...,100,safe
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2147,Tue Aug 18 20:15:52 +0000 2020,1.2958167066615726e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,,Y,True,postponejeeandneet everyth plan accord to the ...,100,safe
2148,Tue Aug 18 20:16:02 +0000 2020,1.2958167467886305e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,,Y,True,the term brand safeti taken liter mean the pandem,100,safe
2149,Tue Aug 18 20:16:05 +0000 2020,1.295816760801796e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,,Y,True,hungarian univers must provid the follow safet...,100,safe
2150,Tue Aug 18 20:16:06 +0000 2020,1.2958167636623073e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,,Y,True,croatia safe binat coupl separ eu coronaviru e...,100,safe


### All Proportions

In [27]:
print("For IMPLEMENTATION:", proportion(interim_Imp_final, interim_Imp))
print("For ADAPTATION:", proportion(interim_Ada_final, interim_Ada))
print("For NEGATIVE EMOTIONS:", proportion(interim_Ne_final, interim_Ne))
print("For SOCIAL DISRUPTION:", proportion(interim_Sd_final, interim_Sd))
print("For PURPOSE:", proportion(interim_Purp_final, interim_Purp))
print("For POSITIVE EMOTION:", proportion(interim_Pe_final, interim_Pe))

For IMPLEMENTATION: 0.2000416635103401
For ADAPTATION: 0.029940913567154004
For NEGATIVE EMOTIONS: 0.048604272403605785
For SOCIAL DISRUPTION: 0.02428793273236876
For PURPOSE: 0.019439815165517765
For POSITIVE EMOTION: 0.020377244148170593


In [28]:
def merge_interim(data, interim_data_final_):
    merge = pd.merge(data, interim_data_final_, how = 'left', on = ['id'], left_index=False, right_index=False, sort=True)
    return merge

In [34]:
merge = merge_interim(data, interim_Imp_final)
cols = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 161, 162]
merge = merge[merge.columns.values[cols]]

merge = merge_interim(merge, interim_Ada_final)
cols = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 163, 164]
merge = merge[merge.columns.values[cols]]

merge = merge_interim(merge, interim_Ne_final)
cols = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 165, 166]
merge = merge[merge.columns.values[cols]]

merge = merge_interim(merge, interim_Sd_final)
cols = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 167, 168]
merge = merge[merge.columns.values[cols]]

merge = merge_interim(merge, interim_Purp_final)
cols = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 169, 170]
merge = merge[merge.columns.values[cols]]

merge = merge_interim(merge, interim_Pe_final)
cols = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 171, 172]
merge = merge[merge.columns.values[cols]]
merge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 105618 entries, 0 to 105617
Data columns (total 93 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   created_at_x                 105618 non-null  object 
 1   id                           105618 non-null  object 
 2   source_x                     105557 non-null  object 
 3   is_quote_tweet_x             105618 non-null  object 
 4   quoted_tweet_id_x            26939 non-null   object 
 5   in_reply_to_status_id_str_x  13393 non-null   object 
 6   in_reply_to_user_id_str_x    14366 non-null   object 
 7   in_reply_to_screen_name_x    14365 non-null   object 
 8   lang_x                       105618 non-null  object 
 9   quote_count_x                105618 non-null  object 
 10  reply_count_x                105618 non-null  object 
 11  retweet_count_x              105618 non-null  float64
 12  like_count_x                 105618 non-null  float64
 13 

In [40]:
merge

Unnamed: 0,created_at_x,id,source_x,is_quote_tweet_x,quoted_tweet_id_x,in_reply_to_status_id_str_x,in_reply_to_user_id_str_x,in_reply_to_screen_name_x,lang_x,quote_count_x,...,final_score_Ada,final_keyword_match_Ada,final_score_Ne,final_keyword_match_Ne,final_score_Sd,final_keyword_match_Sd,final_score_Purp,final_keyword_match_Purp,final_score_Pe,final_keyword_match_Pe
0,Tue Aug 18 19:30:11 +0000 2020,1.2958052093864223e+18,"<a href=""http://twitter.com/download/android"" ...",False,,,,,en,0.0,...,,,,,,,,,,
1,Tue Aug 18 19:30:11 +0000 2020,1.2958052095459328e+18,"<a href=""http://twitter.com/download/android"" ...",True,1.2955396626316452e+18,,,,en,0.0,...,,,,,,,,,,
2,Tue Aug 18 19:30:11 +0000 2020,1.2958052096341197e+18,"<a href=""http://twitter.com/download/android"" ...",True,1.2956733119265505e+18,,,,en,0.0,...,,,,,,,,,,
3,Tue Aug 18 19:30:11 +0000 2020,1.2958052097221837e+18,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,,en,0.0,...,,,100.0,lone,,,,,,
4,Tue Aug 18 19:30:11 +0000 2020,1.2958052099235308e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2955719577741967e+18,,,,en,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105613,Tue Aug 18 20:22:17 +0000 2020,1.2958183206677094e+18,"<a href=""http://twitter.com/download/android"" ...",False,,1.2958170296231854e+18,1.2688670061844644e+18,lindaba58979338,en,0.0,...,,,,,,,,,,
105614,Tue Aug 18 20:22:17 +0000 2020,1.2958183206719078e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2944862207483167e+18,,,,en,0.0,...,,,,,,,,,,
105615,Tue Aug 18 20:22:17 +0000 2020,1.2958183208269207e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2953457100327526e+18,,,,en,0.0,...,,,,,,,,,,
105616,Tue Aug 18 20:22:17 +0000 2020,1.295818321468838e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,en,0.0,...,,,,,,,,,,
