In [1]:
#Make imports
# !pip install fuzzywuzzy
# !pip install python-Levenshtein
# !pip install sklearn
# !pip install pandas
# !pip install numpy

import pandas as pd
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import string
import sklearn
import numpy as np
import time
from enum import Enum

!pip install multiprocess
import multiprocess
import multiprocessing as mp
from multiprocessing import Pool, Process


import nltk
from nltk import pos_tag, pos_tag_sents
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
stop_words = set(stopwords.words('english')) - set(['at', 'do', 'your', 'from', 'to', 'out', 'no', 'the'])

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


[nltk_data] Downloading package stopwords to
[nltk_data]     /home/manikya_varshney/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/manikya_varshney/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [2]:
start = time.time()
path = '/home/manikya_varshney/Documents/Python/Yale/Latest/output_data/Tue Aug 18/combined_Tue Aug 18.csv'
data = pd.read_csv(path, index_col=None, header=0, engine='python' )
end = time.time()
print("Read csv with pandas: ",(end-start),"sec")

Read csv with pandas:  7.071659803390503 sec


In [3]:
keywords_Imp = ['stay at home' , 'do your part', 'Responsible', 
            'home', 'house', 'cancel', 'shutdown', 'postpone',
            'school closure', 'Closure', 'business closure',
            'suspension', 'quarantine', 'lockdown', 'social distance', 
            'social distancing', 'self quarantine', 'isolat', '6-feet',
            'distance', '#clubquarantine', '#quarantinelife', '#quarantineacitivites']

keywords_Ada = ['school from home' , 'learn', 'remote', 'school food service', 
            'online shopping', 'online purchase', 'online church', 'delivery',
            'drive thru', 'to go', 'take out', 'Tiktok', 'Netflix', 'telework', 
            'zoom', 'telehealth', 'telemedicine', 'work from home', 'wfh',
            'working at home', 'working remotely', 'online meeting']

keywords_Ne = ['bored' , 'lonely', 'stress', 
            'anxiety', 'scared', 'worry', 'end', 'cabin fever',
            '#sideeffectsofquarantinelife', 'tissue paper', 'toilet paper']

keywords_Sd = ['social functions' , 'gathering', 'empty streets', 
            'interaction', 'large', 'no cars', 'non-essential',
            'travel', 'unnecessary', 'crowd']

keywords_Purp = ['Flatten the curve' , 'Slow the spread', 'slow transmission', 
            'protect', 'save', '#stayhomesavelives']

keywords_Pe = ['silver lining' , 'optimistic', 'hope', 
            'bright side', 'Safe', '#togetherapart']

In [4]:
# Constants
#FINAL_COL_NAME = "FINAL_TEXT"
num_cores = mp.cpu_count()
FINAL_COL_NAME = "FINAL_TEXT"

In [5]:
def keywords_cleaning(keywords_list):
    
    #Convert to lower
    for i in range(len(keywords_list)): 
        keywords_list[i] = keywords_list[i].lower()
    
    #Remove punctuations
    for i in range(len(keywords_list)):
        keywords_list[i] = keywords_list[i].translate(str.maketrans('','',string.punctuation))
    
    #More cleaning
    for i in range(len(keywords_list)):
        keywords_list[i] = keywords_list[i].replace('/[^a-zA-Z0-9 ]/g', '').replace('\n',' ').strip('“').strip('“').strip('’').lstrip(' ').rstrip(' ')

    #Remove stop words
    def remove_stopwords(data):
        output_array=[]
        for sentence in data:
            temp_list=[]
            for word in sentence.split():
                if word not in stop_words:
                    temp_list.append(word)
            output_array.append(' '.join(temp_list))
        return output_array

    keywords_list=remove_stopwords(keywords_list)

    #Stemming
    ps = PorterStemmer()
    keywords_list_stem = [[ps.stem(word) for word in sentence.split(" ")] for sentence in keywords_list]
    keywords_list_final = [" ".join(sentence) for sentence in keywords_list_stem]

    return keywords_list_final


In [6]:
# Enum for facets
class Facets(Enum):
    IMPLEMENTATION = "Imp"
    ADAPTATION = "Ada"
    NEGATIVE_EMOTIONS = "Ne"
    SOCIAL_DISRUPTION = "Sd"
    PURPOSE = "Purp"
    POSITIVE_EMOTION = "Pe"

In [7]:
def fuzzy_logic(row, FINAL_COL_NAME, keywords, facet):
    keyword_match, score = process.extractOne(row[FINAL_COL_NAME], keywords, scorer = fuzz.partial_ratio)
    row['final_score_{}'.format(facet.value)] = score
    row['final_keyword_match_{}'.format(facet.value)] = keyword_match
    return row

In [8]:
def keep_only_highest(data, high_value, facet):
    data['final_score_{}'.format(facet.value)] = data['final_score_{}'.format(facet.value)].astype(int)    
    data = data[data['final_score_{}'.format(facet.value)] == 100].reset_index(drop=True)    
    return data

In [9]:
def proportion(interim_data_final, interim_data):
    numerator = interim_data_final.shape[0]
    denominator = interim_data.shape[0]
    prop_val = (numerator/denominator)
    return prop_val

In [10]:
def split_dataframe(df, nums = 4): 
    chunks = list()
    num_chunks = nums
    chunk_size = len(df) // nums
    for i in range(num_chunks):
        chunks.append(df[i*chunk_size:(i+1)*chunk_size])
    return chunks

In [11]:
def multiprocessing_(data_, keywords_, facet_):
    print(data_.shape)
    start = time.time()
    pool = Pool(num_cores)
    df = split_dataframe(data_, num_cores)
    data1 = pool.starmap(fuzzy_logic, [(x, FINAL_COL_NAME, keywords_, facet_) for x in df])
    pool.close()
    pool.join()
    end = time.time()
    print("time = ", end - start)
    return data1

### Keywords Cleaning

In [12]:
keywords_Imp = keywords_cleaning(keywords_Imp)
keywords_Ada = keywords_cleaning(keywords_Ada)
keywords_Ne = keywords_cleaning(keywords_Ne)
keywords_Sd = keywords_cleaning(keywords_Sd)
keywords_Purp = keywords_cleaning(keywords_Purp)
keywords_Pe = keywords_cleaning(keywords_Pe)

### Drop Rows with Empty Tweets

In [13]:
nan_value = float("NaN")
data[FINAL_COL_NAME].replace("", nan_value, inplace=True)
data.dropna(subset = [FINAL_COL_NAME], inplace=True)

# 1_Implementation

In [14]:
#interim_Imp = multiprocessing_(data, keywords_Imp, Facets.IMPLEMENTATION)
interim_Imp = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Imp, Facets.IMPLEMENTATION))
interim_Imp_final = keep_only_highest(interim_Imp, 100, Facets.IMPLEMENTATION)

In [15]:
interim_Imp_final

Unnamed: 0.1,Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,...,QT_place_full_name,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score_Imp,final_keyword_match_Imp
0,44,Tue Aug 18 19:46:27 +0000 2020,1.2958093031364116e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.295726622818087e+18,,,,en,...,,,,Just heard that our article on our combined AH...,Just heard that our article on our combined AH...,Y,False,Just heard that our article on our combined AH...,100,respons
1,71,Tue Aug 18 19:46:28 +0000 2020,1.2958093054432625e+18,"<a href=""http://twitter.com/download/android"" ...",True,1.2945049116531016e+18,,,,en,...,,,,The first 3 words are the things you’ll get af...,,Y,False,The first 3 words are the things you’ll get af...,100,quarantin
2,188,Tue Aug 18 19:46:30 +0000 2020,1.2958093172164813e+18,"<a href=""http://twitter.com/download/android"" ...",True,1.2945049116531016e+18,,,,en,...,,,,The first 3 words are the things you’ll get af...,,Y,False,The first 3 words are the things you’ll get af...,100,quarantin
3,319,Tue Aug 18 19:46:33 +0000 2020,1.295809329115865e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.2957721768057774e+18,,,,en,...,,,,I'm VERY supportive of the homeless community....,I'm VERY supportive of the homeless community....,Y,False,I'm VERY supportive of the homeless community....,100,home
4,369,Tue Aug 18 19:46:34 +0000 2020,1.2958093345767383e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.295769834995712e+18,,,,en,...,,,,I'd take a full lockdown for 3 weeks with the ...,I'd take a full lockdown for 3 weeks with the ...,Y,False,I'd take a full lockdown for 3 weeks with the ...,100,lockdown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2416,11141,Tue Aug 18 20:16:00 +0000 2020,1.295816740924846e+18,"<a href=""https://about.twitter.com/products/tw...",True,1.2955036779654472e+18,,,,en,...,,,,"NEW: Mississippi has quarantined 2,035 student...","NEW: Mississippi has quarantined 2,035 student...",Y,True,"NEW: Mississippi has quarantined 2,035 student...",100,quarantin
2417,11143,Tue Aug 18 20:16:00 +0000 2020,1.295816740925055e+18,"<a href=""http://twitter.com/download/android"" ...",True,1.295730601773994e+18,,,,en,...,,,,"""Nine cases in a day does not compare to the U...","""Nine cases in a day does not compare to the U...",Y,True,"""Nine cases in a day does not compare to the U...",100,hous
2418,11158,Tue Aug 18 20:16:01 +0000 2020,1.295816742548144e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2954363787972485e+18,,,,en,...,,,,LIVE: Crowds in Buenos Aires rally against qua...,,Y,True,LIVE: Crowds in Buenos Aires rally against qua...,100,quarantin
2419,11201,Tue Aug 18 20:16:03 +0000 2020,1.2958167506348605e+18,"<a href=""http://twitter.com/download/android"" ...",True,1.2956416406742423e+18,,,,en,...,,,,⚠️Help avoid a local lockdown ⚠️\n\nThere has ...,⚠️Help avoid a local lockdown ⚠️\n\nThere has ...,Y,True,⚠️Help avoid a local lockdown ⚠️\n\nThere has ...,100,lockdown


# 2_Adaptation

In [16]:
interim_Ada = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Ada, Facets.ADAPTATION))
interim_Ada_final = keep_only_highest(interim_Ada, 100, Facets.ADAPTATION)

In [17]:
interim_Ada_final

Unnamed: 0.1,Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,...,QT_place_full_name,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score_Ada,final_keyword_match_Ada
0,1335,Tue Aug 18 19:46:56 +0000 2020,1.2958094262558351e+18,"<a href=""http://twitter.com/#!/download/ipad"" ...",True,1.2958085821606543e+18,,,,en,...,,,,"At at time of mass work from home, Amazon is b...","At at time of mass work from home, Amazon is b...",Y,False,"At at time of mass work from home, Amazon is b...",100,work from home
1,2283,Tue Aug 18 19:47:17 +0000 2020,1.295809514365649e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.2958001649223352e+18,,,,en,...,,,,FL COVID-19 Update (1/5): Antibody testing fro...,FL COVID-19 Update (1/5): Antibody testing fro...,Y,False,FL COVID-19 Update (1/5): Antibody testing fro...,100,drive thru
2,2796,Tue Aug 18 19:47:30 +0000 2020,1.2958095668239155e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.295540815360598e+18,,,,en,...,,,,Glad this COVID shit ain’t happen when I was i...,,Y,False,Glad this COVID shit ain’t happen when I was i...,100,zoom
3,3033,Tue Aug 18 19:47:35 +0000 2020,1.2958095892088996e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2958042976540017e+18,,,,en,...,,,,"""For those DC folks that have jobs that lend t...","""For those DC folks that have jobs that lend t...",Y,False,"""For those DC folks that have jobs that lend t...",100,remot
4,3137,Tue Aug 18 19:47:37 +0000 2020,1.2958095982056284e+18,"<a href=""http://twitter.com/download/android"" ...",True,1.2958001649223352e+18,,,,en,...,,,,FL COVID-19 Update (1/5): Antibody testing fro...,FL COVID-19 Update (1/5): Antibody testing fro...,Y,False,FL COVID-19 Update (1/5): Antibody testing fro...,100,drive thru
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
497,10801,Tue Aug 18 20:15:46 +0000 2020,1.2958166817391247e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2958001649223352e+18,,,,en,...,,,,FL COVID-19 Update (1/5): Antibody testing fro...,FL COVID-19 Update (1/5): Antibody testing fro...,Y,True,FL COVID-19 Update (1/5): Antibody testing fro...,100,drive thru
498,11047,Tue Aug 18 20:15:53 +0000 2020,1.2958167093460746e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.2958001649223352e+18,,,,en,...,,,,FL COVID-19 Update (1/5): Antibody testing fro...,FL COVID-19 Update (1/5): Antibody testing fro...,Y,True,FL COVID-19 Update (1/5): Antibody testing fro...,100,drive thru
499,11227,Tue Aug 18 20:16:04 +0000 2020,1.295816756036952e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2958001649223352e+18,,,,en,...,,,,FL COVID-19 Update (1/5): Antibody testing fro...,FL COVID-19 Update (1/5): Antibody testing fro...,Y,True,FL COVID-19 Update (1/5): Antibody testing fro...,100,drive thru
500,11422,Tue Aug 18 20:16:12 +0000 2020,1.2958167881484902e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.2958001649223352e+18,,,,en,...,,,,FL COVID-19 Update (1/5): Antibody testing fro...,FL COVID-19 Update (1/5): Antibody testing fro...,Y,True,FL COVID-19 Update (1/5): Antibody testing fro...,100,drive thru


# 3_Negative Emotion

In [18]:
interim_Ne = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Ne, Facets.NEGATIVE_EMOTIONS))
interim_Ne_final = keep_only_highest(interim_Ne, 100, Facets.NEGATIVE_EMOTIONS)

In [19]:
interim_Ne_final

Unnamed: 0.1,Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,...,QT_place_full_name,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score_Ne,final_keyword_match_Ne
0,381,Tue Aug 18 19:46:35 +0000 2020,1.29580933588113e+18,"<a href=""http://twitter.com/download/android"" ...",True,1.2957651042946785e+18,,,,en,...,,,,Why JEE and NEET aspirants worried about givin...,Why JEE and NEET aspirants worried about givin...,Y,False,Why JEE and NEET aspirants worried about givin...,100,worri
1,592,Tue Aug 18 19:46:39 +0000 2020,1.2958093537952113e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2953697851533353e+18,,,,en,...,,,,I’m scared to get bullied but I’m more scared ...,I’m scared to get bullied but I’m more scared ...,Y,False,I’m scared to get bullied but I’m more scared ...,100,scare
2,819,Tue Aug 18 19:46:44 +0000 2020,1.295809376322814e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.29555261102678e+18,,,,en,...,,,,If we want to end the chaos and division—and k...,If we want to end the chaos and division—and k...,Y,False,If we want to end the chaos and division—and k...,100,end
3,904,Tue Aug 18 19:46:47 +0000 2020,1.2958093850970808e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.295739747344642e+18,,,,en,...,,,,@DrRPNishank @narendramodi @AmitShah @PMOIndia...,@DrRPNishank @narendramodi @AmitShah @PMOIndia...,Y,False,@DrRPNishank @narendramodi @AmitShah @PMOIndia...,100,end
4,977,Tue Aug 18 19:46:48 +0000 2020,1.2958093915900273e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.2957920640730112e+18,,,,en,...,,,,Germany to extend coronavirus furlough to 24 m...,,Y,False,Germany to extend coronavirus furlough to 24 m...,100,end
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1021,10526,Tue Aug 18 20:15:35 +0000 2020,1.295816633953538e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2957269547511685e+18,,,,en,...,,,,What a run. @patriotact has come to an end. I ...,What a run. @patriotact has come to an end. I ...,Y,True,What a run. @patriotact has come to an end. I ...,100,end
1022,10598,Tue Aug 18 20:15:38 +0000 2020,1.2958166461211853e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2957755666716099e+18,,,,en,...,,,,Germany to extend coronavirus furlough to 24 m...,,Y,True,Germany to extend coronavirus furlough to 24 m...,100,end
1023,10657,Tue Aug 18 20:15:40 +0000 2020,1.2958166548285768e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2953455582704883e+18,,,,en,...,,,,VIDEO: 🇨🇳 Crowds packed out a water park over ...,VIDEO: 🇨🇳 Crowds packed out a water park over ...,Y,True,VIDEO: 🇨🇳 Crowds packed out a water park over ...,100,end
1024,10934,Tue Aug 18 20:15:51 +0000 2020,1.2958167038598144e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.2947024247043932e+18,,,,en,...,,,,MY BEST Friend James and I Have not seen each...,MY BEST Friend James and I Have not seen each...,Y,True,MY BEST Friend James and I Have not seen each...,100,end


# 4_Social Disruption

In [20]:
interim_Sd = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Sd, Facets.SOCIAL_DISRUPTION))
interim_Sd_final = keep_only_highest(interim_Sd, 100, Facets.SOCIAL_DISRUPTION)

In [21]:
interim_Sd_final

Unnamed: 0.1,Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,...,QT_place_full_name,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score_Sd,final_keyword_match_Sd
0,24,Tue Aug 18 19:46:27 +0000 2020,1.2958093014795305e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.295427424130343e+18,,,,en,...,,,,NEW: Health officials in Northern BC have link...,NEW: Health officials in Northern BC have link...,Y,False,NEW: Health officials in Northern BC have link...,100,gather
1,471,Tue Aug 18 19:46:36 +0000 2020,1.2958093429696512e+18,"<a href=""http://twitter.com/download/android"" ...",True,1.295808825384198e+18,,,,en,...,,,,The crowds walking into town centre in Kíllarn...,The crowds walking into town centre in Kíllarn...,Y,False,The crowds walking into town centre in Kíllarn...,100,crowd
2,571,Tue Aug 18 19:46:39 +0000 2020,1.2958093520125706e+18,"<a href=""http://twitter.com/download/android"" ...",True,1.2957646701717996e+18,,,,en,...,,,,Thousands of Wuhan revelers gathered in a wate...,Thousands of Wuhan revelers gathered in a wate...,Y,False,Thousands of Wuhan revelers gathered in a wate...,100,gather
3,587,Tue Aug 18 19:46:39 +0000 2020,1.2958093535225815e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.295780262211715e+18,,1.067529e+18,HPolymenis,en,...,,,,US universities are reporting hundreds of new ...,US universities are reporting hundreds of new ...,Y,False,US universities are reporting hundreds of new ...,100,gather
4,2937,Tue Aug 18 19:47:33 +0000 2020,1.2958095791258214e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.295635292838404e+18,,,,en,...,,,,.@CNNTravel: It was ground zero in the coronav...,.@CNNTravel: It was ground zero in the coronav...,Y,False,.@CNNTravel: It was ground zero in the coronav...,100,travel
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
534,9861,Tue Aug 18 20:15:07 +0000 2020,1.2958165158250824e+18,"<a href=""https://www.botcollective.com/bots/ju...",True,1.295635292838404e+18,,,,en,...,,,,.@CNNTravel: It was ground zero in the coronav...,.@CNNTravel: It was ground zero in the coronav...,Y,True,.@CNNTravel: It was ground zero in the coronav...,100,travel
535,10657,Tue Aug 18 20:15:40 +0000 2020,1.2958166548285768e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2953455582704883e+18,,,,en,...,,,,VIDEO: 🇨🇳 Crowds packed out a water park over ...,VIDEO: 🇨🇳 Crowds packed out a water park over ...,Y,True,VIDEO: 🇨🇳 Crowds packed out a water park over ...,100,crowd
536,11100,Tue Aug 18 20:15:59 +0000 2020,1.2958167334508012e+18,"<a href=""http://twitter.com/download/android"" ...",True,1.295379649086853e+18,,,,en,...,,,,Bravo! 🥳👏👍\n\nLarge protests in Madrid against...,Bravo! 🥳👏👍\n\nLarge protests in Madrid against...,Y,True,Bravo! 🥳👏👍\n\nLarge protests in Madrid against...,100,larg
537,11158,Tue Aug 18 20:16:01 +0000 2020,1.295816742548144e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2954363787972485e+18,,,,en,...,,,,LIVE: Crowds in Buenos Aires rally against qua...,,Y,True,LIVE: Crowds in Buenos Aires rally against qua...,100,crowd


# 5_Purpose

In [22]:
interim_Purp = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Purp, Facets.PURPOSE))
interim_Purp_final = keep_only_highest(interim_Purp, 100, Facets.PURPOSE)

In [23]:
interim_Purp_final

Unnamed: 0.1,Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,...,QT_place_full_name,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score_Purp,final_keyword_match_Purp
0,1897,Tue Aug 18 19:47:09 +0000 2020,1.295809478537904e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2826761567648072e+18,,,,en,...,,,,"If you’ve had COVID-19, your plasma can litera...",,Y,False,"If you’ve had COVID-19, your plasma can litera...",100,save
1,3026,Tue Aug 18 19:47:35 +0000 2020,1.2958095886132183e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.2957870734509343e+18,,,,en,...,,,,Cuomo writing a book about protecting the elde...,,Y,False,Cuomo writing a book about protecting the elde...,100,protect
2,3634,Tue Aug 18 19:47:48 +0000 2020,1.2958096428666388e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.2957582221045924e+18,,,,en,...,,,,Compare the ubiquitous “flatten the curve” gra...,Compare the ubiquitous “flatten the curve” gra...,Y,False,Compare the ubiquitous “flatten the curve” gra...,100,flatten the curv
3,4596,Tue Aug 18 19:48:09 +0000 2020,1.2958097299613614e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.295442776172241e+18,,,,en,...,,,,"Secretary Esper and President Trump, it's a pr...","Secretary Esper and President Trump, it's a pr...",Y,False,"Secretary Esper and President Trump, it's a pr...",100,protect
4,6312,Tue Aug 18 19:48:47 +0000 2020,1.2958098911861596e+18,"<a href=""http://twitter.com/download/android"" ...",True,1.2689929275330724e+18,,,,en,...,,,,Masks on their own will not protect you from #...,Masks on their own will not protect you from #...,Y,False,Masks on their own will not protect you from #...,100,protect
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259,7317,Tue Aug 18 20:13:21 +0000 2020,1.2958160744373903e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.295677038301524e+18,,,,en,...,,,,....My Administration and I built the greatest...,....My Administration and I built the greatest...,Y,True,....My Administration and I built the greatest...,100,save
260,7750,Tue Aug 18 20:13:39 +0000 2020,1.2958161491590226e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.2957107906834637e+18,,,,en,...,,,,.@MattHancock today announced the establishmen...,.@MattHancock today announced the establishmen...,Y,True,.@MattHancock today announced the establishmen...,100,protect
261,8664,Tue Aug 18 20:14:19 +0000 2020,1.2958163146620723e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2954030841773425e+18,,,,en,...,,,,We ask #G20 Leaders to guarantee the social pr...,We ask #G20 Leaders to guarantee the social pr...,Y,True,We ask #G20 Leaders to guarantee the social pr...,100,protect
262,9273,Tue Aug 18 20:14:44 +0000 2020,1.2958164218056417e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2957956168456271e+18,,,,en,...,,,,✔️ Wear a mask. 😷\n✔️ Show your Shocker Pride...,,Y,True,✔️ Wear a mask. 😷\n✔️ Show your Shocker Pride...,100,protect


# 6_Positive Emotion

In [24]:
interim_Pe = data.apply(fuzzy_logic, axis = 1, args = (FINAL_COL_NAME, keywords_Pe, Facets.POSITIVE_EMOTION))
interim_Pe_final = keep_only_highest(interim_Pe, 100, Facets.POSITIVE_EMOTION)

In [25]:
interim_Pe_final

Unnamed: 0.1,Unnamed: 0,created_at,id,source,is_quote_tweet,quoted_tweet_id,in_reply_to_status_id_str,in_reply_to_user_id_str,in_reply_to_screen_name,lang,...,QT_place_full_name,QT_place_country_code,QT_coordinates,QT_text,QT_full_text,TEST_FLAG,RT,FINAL_TEXT,final_score_Pe,final_keyword_match_Pe
0,3991,Tue Aug 18 19:47:56 +0000 2020,1.295809676693647e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.295759764744528e+18,,,,en,...,,,,We’re rolling out new features in #GoogleMeet ...,We’re rolling out new features in #GoogleMeet ...,Y,False,We’re rolling out new features in #GoogleMeet ...,100,safe
1,4049,Tue Aug 18 19:47:57 +0000 2020,1.2958096821713633e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.2956617198560663e+18,,,,en,...,,,,"Hello, here is your mouse for today ! Have a g...",,Y,False,"Hello, here is your mouse for today ! Have a g...",100,safe
2,4596,Tue Aug 18 19:48:09 +0000 2020,1.2958097299613614e+18,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,1.295442776172241e+18,,,,en,...,,,,"Secretary Esper and President Trump, it's a pr...","Secretary Esper and President Trump, it's a pr...",Y,False,"Secretary Esper and President Trump, it's a pr...",100,safe
3,6072,Tue Aug 18 19:48:42 +0000 2020,1.2958098693087232e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.295760786191782e+18,,,,en,...,,,,We Hope that you have a Great first day of Sch...,,Y,False,We Hope that you have a Great first day of Sch...,100,hope
4,8507,Tue Aug 18 19:49:50 +0000 2020,1.2958101563458191e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2957329668622623e+18,,,,en,...,"Mattituck, NY",US,,@valmont316 @gatewaypundit Cuomo was left alon...,@valmont316 @gatewaypundit Cuomo was left alon...,Y,False,@valmont316 @gatewaypundit Cuomo was left alon...,100,safe
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
225,9119,Tue Aug 18 20:14:37 +0000 2020,1.295816393506599e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2957348553224724e+18,,,,en,...,,,,Did people forget Biden’s team was encouraging...,Did people forget Biden’s team was encouraging...,Y,True,Did people forget Biden’s team was encouraging...,100,safe
226,9941,Tue Aug 18 20:15:10 +0000 2020,1.2958165280220037e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.2955152831136686e+18,,,,en,...,,,,@pattibacchus so rather than lowering class si...,,Y,True,@pattibacchus so rather than lowering class si...,100,safe
227,10043,Tue Aug 18 20:15:14 +0000 2020,1.2958165450930012e+18,"<a href=""http://twitter.com/download/android"" ...",True,1.2956052291827835e+18,,,,en,...,,,,There is now clear data on Covid-19 and childr...,,Y,True,There is now clear data on Covid-19 and childr...,100,safe
228,10482,Tue Aug 18 20:15:33 +0000 2020,1.2958166274649293e+18,"<a href=""http://twitter.com/download/iphone"" r...",True,1.295744657654272e+18,,,,en,...,,,,Where the Christian council is tho? I hope the...,,Y,True,Where the Christian council is tho? I hope the...,100,hope


### All Proportions

In [26]:
print("For IMPLEMENTATION:", proportion(interim_Imp_final, interim_Imp))
print("For ADAPTATION:", proportion(interim_Ada_final, interim_Ada))
print("For NEGATIVE EMOTIONS:", proportion(interim_Ne_final, interim_Ne))
print("For SOCIAL DISRUPTION:", proportion(interim_Sd_final, interim_Sd))
print("For PURPOSE:", proportion(interim_Purp_final, interim_Purp))
print("For POSITIVE EMOTION:", proportion(interim_Pe_final, interim_Pe))

For IMPLEMENTATION: 0.08952078094956367
For ADAPTATION: 0.018562342848691023
For NEGATIVE EMOTIONS: 0.03793817482620914
For SOCIAL DISRUPTION: 0.019930483656263865
For PURPOSE: 0.009761869545925159
For POSITIVE EMOTION: 0.008504659074101464


In [27]:
def merge_interim(data, interim_data_final_):
    merge = pd.merge(data, interim_data_final_, how = 'left', on = ['id'], left_index=False, right_index=False, sort=True)
    return merge

In [28]:
merge = merge_interim(data, interim_Imp_final)
cols = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 157, 158]
merge = merge[merge.columns.values[cols]]
merge = merge_interim(merge, interim_Ada_final)
cols = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78,79, 80, 159, 160]
merge = merge[merge.columns.values[cols]]
merge = merge_interim(merge, interim_Ne_final)
cols = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 161, 162]
merge = merge[merge.columns.values[cols]]
merge = merge_interim(merge, interim_Sd_final)
cols = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,83, 84, 163, 164]
merge = merge[merge.columns.values[cols]]
merge = merge_interim(merge, interim_Purp_final)
cols = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 165, 166]
merge = merge[merge.columns.values[cols]]
merge = merge_interim(merge, interim_Pe_final)
cols = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 167, 168]
merge = merge[merge.columns.values[cols]]
merge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 27044 entries, 0 to 27043
Columns: 115 entries, Unnamed: 0_x to QT_verified
dtypes: float64(17), object(98)
memory usage: 23.9+ MB
