# Project to predict the popularity of Political leaders and detect hate speech and tribalism from Facebook and Twitter comments

## Imports

In [1]:
from webdriver_manager.chrome import ChromeDriverManager

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import ElementClickInterceptedException
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

from bs4 import BeautifulSoup

import pandas as pd

import time

# for tweets collection
import tweepy

# text cleaning
import preprocessor as p

# text preprocessing
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.corpus import stopwords
from nltk.corpus import wordnet

#lemmatization
from nltk.stem import WordNetLemmatizer

#sentiment analysis
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn
from nltk.tag import pos_tag

## visualization
import matplotlib.pyplot as plt
%matplotlib inline

# get the language of the text
import langdetect
#for ner recognition
import spacy

# GET FACEBOOK COMMENTS

In [2]:
# scroll down
def scroll():
    driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.END)
    time.sleep(3)
    pop_out_btn = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "popup_xout")))
    pop_out_btn.click()

In [3]:
# funtion to click the view more button
def view_more_click(driver):
    try: 
        view_more_comments = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "View more comments…")))
        view_more_comments.click()
    
    except TimeoutException:
        view_more_comments = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "View previous comments…")))
        view_more_comments.click()
    
    except (NoSuchElementException, ElementClickInterceptedException, StaleElementReferenceException) :
        print("Problem found")
    html = driver.page_source
    return html

# function to click the button 50 times
def view_more_comm(driver):
    i = 0
    while i < 50:
        html = view_more_click(driver)
        time.sleep(3)
        i += 1
    return html

In [4]:
# function to get a list of all the comment elements
def get_comments(html):
    soup = BeautifulSoup(html, 'html.parser')
    comment_section = soup.find("div", {"class":"_59e9 _1-ut _2a_g _34oh" })
    comment_els = comment_section.find_all("div", {"class":"_2a_i"})
    
    master_list = []
    for c in comment_els:
        data_dict = {}
        data_dict["User"] = c.find("div", {"class":"_2b05"}).text
        data_dict["Comment"] = c.find("div", {"data-sigil":"comment-body"}).text
        master_list.append(data_dict)
    return master_list

    
    

# GETTING TWITTER DATA

## Twitter Authentication

In [5]:
# read the keys from the file 
def authorize_twitter():
    keys = []
    with open('../Keys.txt') as f:
        for line in f:
            keys.append(line.strip())

    API_KEY = keys[1]
    API_KEY_SECRET = keys [4]
    ACCESS_TOKEN = keys[10]
    ACCESS_TOKEN_SECRET = keys[13]
    
    # initialize the api
    auth = tweepy.OAuthHandler(API_KEY, API_KEY_SECRET)
    auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
    api = tweepy.API(auth)
    
    return (api)

## Get the replies from the posts

In [6]:
def get_replies(url):
    name = url.split("/")[-3]
    tweet_id = url.split("/")[-1]

    #empty list for the replies
    replies = []
    
    api = authorize_twitter()
    # get replies from the tweet
    for tweet in tweepy.Cursor(api.search_tweets,q='to:'+name, result_type='recent').items(1000):
        if hasattr(tweet, 'in_reply_to_status_id_str'):
            if (tweet.in_reply_to_status_id_str==tweet_id):
                replies.append(tweet)
    
    #master list to hold all the data needed
    master_list = []
    for reply in replies:
        data_dict = {}
        data_dict["User"] = reply.author.screen_name
        data_dict["Comment"] = reply.text

        master_list.append(data_dict)
        
    return (master_list)

# TEXT PREPROCESSING

## Tokenization and POS tagging + removing stop words

In [7]:
#perform tokenization and pos tagging and remove stop words
pos_dict = {'J': wordnet.ADJ, 'v': wordnet.VERB, 'N': wordnet.NOUN, 'R': wordnet.ADV}
def token_stop_pos(text):
    tags = pos_tag(word_tokenize(text))
    newlist = []
    
    for word, tag in tags:
        if word.lower() not in set(stopwords.words('english')):
            newlist.append(tuple([word, pos_dict.get(tag[0])]))
            
    return newlist

## Lemmatization

In [9]:
wordnet_lemmatizer = WordNetLemmatizer()
def lemmatize(pos_data):
    lemma_rew = " "
    for word, pos in pos_data:
        if not pos:
            lemma = word
            lemma_rew = lemma_rew + " " + lemma
        else:
            lemma = wordnet_lemmatizer.lemmatize(word, pos=pos)
            lemma_rew = lemma_rew + " " + lemma
    return lemma_rew


## SENTIMENT ANALYSIS USING VADER

In [10]:
def vaderSentimentAnalysis(text):
    analyzer = SentimentIntensityAnalyzer()
    vs= analyzer.polarity_scores(text)
    return vs['compound']

# function to analyse 
def vader_analysis(compound):
    if compound > 0:
        return 'Positive'
    elif compound < 0 :
        return 'Negative'
    else:
        return 'Neutral'

## Get names and parties from the text

In [11]:
def get_parties(text):
    for i in range(len(text)):
        parties=[]
        tags = text[i][1]
        if tags == "POLITICAL_PARTY":
            party = text[i][0]
            if party not in parties:
                parties.append(party)
        else:
            continue
        return parties

In [12]:
def get_names(text):
    names=[]
    for i in range(len(text)):
        tags = text[i][1]
        if tags == "POLITICIAN":
            name = text[i][0]
            if name not in names:
                names.append(name)
        else:
            continue
    return names

# MAIN FILE

In [14]:

choice = input("Please select the choice of your data. \n 1. for facebook. \n 2. For twitter")
if choice == "1":
    # install webdriver
    driver = webdriver.Chrome(ChromeDriverManager().install())
    url = input("Paste the Facebook mobile url: ")
    driver.get(url)
    scroll()

    print("Getting comments please wait...\n")
    html = view_more_comm(driver)

    master_list = get_comments(html)

    print(str(len(master_list)) + " Comments found")
        
else:
    url = input("Paste the tweet url: ")
    print("----Getting replies please wait------")
    master_list = get_replies(url)
    print(str(len(master_list)) + " Comments found")
        
    
    #create a dataframe
df = pd.DataFrame(master_list)
df["clean_tweets"] = df["Comment"].apply(lambda x: p.clean(x))
df

Please select the choice of your data. 
 1. for facebook. 
 2. For twitter1




Current google-chrome version is 99.0.4844
Get LATEST chromedriver version for 99.0.4844 google-chrome
Driver [C:\Users\NYAM\.wdm\drivers\chromedriver\win32\99.0.4844.51\chromedriver.exe] found in cache
  driver = webdriver.Chrome(ChromeDriverManager().install())


Paste the Facebook mobile url: https://mobile.facebook.com/NTVKenya/photos/a.80240814057/10157943885044058/?type=3&source=48
Getting comments please wait...

338 Comments found


Unnamed: 0,User,Comment,clean_tweets
0,Trojans War,At least you have confirmed to us that someone...,At least you have confirmed to us that someone...
1,Carole Otieno,Master of lies ....after raila impeached Uhuru...,Master of lies ....after raila impeached Uhuru...
2,Ochieng Owaga,Raila could not impeach Uhuru so that you bene...,Raila could not impeach Uhuru so that you bene...
3,Philemon Liech,Ruto can say anything now that his ill plans h...,Ruto can say anything now that his ill plans h...
4,Erick Moturi,Mr Promises is a dangerous kalenjinga. Can do ...,Mr Promises is a dangerous kalenjinga. Can do ...
...,...,...,...
333,Brayoo Wiseman,Ruto is confused,Ruto is confused
334,Njeri Anjeline,Stop that nonsense.,Stop that nonsense.
335,William Ouma,So Raila wanted to impeach Uhuru so that Ruto ...,So Raila wanted to impeach Uhuru so that Ruto ...
336,Zipsy Ndush,Philemon Liech kumbe unaenda muchene hivo,Philemon Liech kumbe unaenda muchene hivo


## Text Preprocessing

In [16]:
df['POS tagged'] = df['clean_tweets'].apply(lambda x: token_stop_pos(x))
df

Unnamed: 0,User,Comment,clean_tweets,POS tagged
0,Trojans War,At least you have confirmed to us that someone...,At least you have confirmed to us that someone...,"[(least, a), (confirmed, None), (us, None), (s..."
1,Carole Otieno,Master of lies ....after raila impeached Uhuru...,Master of lies ....after raila impeached Uhuru...,"[(Master, n), (lies, n), (...., None), (raila,..."
2,Ochieng Owaga,Raila could not impeach Uhuru so that you bene...,Raila could not impeach Uhuru so that you bene...,"[(Raila, n), (could, None), (impeach, None), (..."
3,Philemon Liech,Ruto can say anything now that his ill plans h...,Ruto can say anything now that his ill plans h...,"[(Ruto, n), (say, None), (anything, n), (ill, ..."
4,Erick Moturi,Mr Promises is a dangerous kalenjinga. Can do ...,Mr Promises is a dangerous kalenjinga. Can do ...,"[(Mr, n), (Promises, n), (dangerous, a), (kale..."
...,...,...,...,...
333,Brayoo Wiseman,Ruto is confused,Ruto is confused,"[(Ruto, n), (confused, None)]"
334,Njeri Anjeline,Stop that nonsense.,Stop that nonsense.,"[(Stop, None), (nonsense, n), (., None)]"
335,William Ouma,So Raila wanted to impeach Uhuru so that Ruto ...,So Raila wanted to impeach Uhuru so that Ruto ...,"[(Raila, n), (wanted, None), (impeach, None), ..."
336,Zipsy Ndush,Philemon Liech kumbe unaenda muchene hivo,Philemon Liech kumbe unaenda muchene hivo,"[(Philemon, n), (Liech, n), (kumbe, None), (un..."


In [17]:
# Lemmatization
df['Lemma'] = df['POS tagged'].apply(lambda x: lemmatize(x))
df.head()

Unnamed: 0,User,Comment,clean_tweets,POS tagged,Lemma
0,Trojans War,At least you have confirmed to us that someone...,At least you have confirmed to us that someone...,"[(least, a), (confirmed, None), (us, None), (s...",least confirmed us someone wanted impeach pr...
1,Carole Otieno,Master of lies ....after raila impeached Uhuru...,Master of lies ....after raila impeached Uhuru...,"[(Master, n), (lies, n), (...., None), (raila,...",Master lie .... raila impeached Uhuru benefi...
2,Ochieng Owaga,Raila could not impeach Uhuru so that you bene...,Raila could not impeach Uhuru so that you bene...,"[(Raila, n), (could, None), (impeach, None), (...",Raila could impeach Uhuru benefit ... 're fo...
3,Philemon Liech,Ruto can say anything now that his ill plans h...,Ruto can say anything now that his ill plans h...,"[(Ruto, n), (say, None), (anything, n), (ill, ...",Ruto say anything ill plan exposed .. danger...
4,Erick Moturi,Mr Promises is a dangerous kalenjinga. Can do ...,Mr Promises is a dangerous kalenjinga. Can do ...,"[(Mr, n), (Promises, n), (dangerous, a), (kale...",Mr Promises dangerous kalenjinga . anything ...


## Get only english text

In [19]:
df['language'] = df['clean_tweets'].apply(lambda x: langdetect.detect(x) if x.strip()!="" else "")
df = df[df['language']=='en'].reset_index(drop=True)
df.drop('language', axis=1)
df

Unnamed: 0,User,Comment,clean_tweets,POS tagged,Lemma,language
0,Trojans War,At least you have confirmed to us that someone...,At least you have confirmed to us that someone...,"[(least, a), (confirmed, None), (us, None), (s...",least confirmed us someone wanted impeach pr...,en
1,Carole Otieno,Master of lies ....after raila impeached Uhuru...,Master of lies ....after raila impeached Uhuru...,"[(Master, n), (lies, n), (...., None), (raila,...",Master lie .... raila impeached Uhuru benefi...,en
2,Ochieng Owaga,Raila could not impeach Uhuru so that you bene...,Raila could not impeach Uhuru so that you bene...,"[(Raila, n), (could, None), (impeach, None), (...",Raila could impeach Uhuru benefit ... 're fo...,en
3,Philemon Liech,Ruto can say anything now that his ill plans h...,Ruto can say anything now that his ill plans h...,"[(Ruto, n), (say, None), (anything, n), (ill, ...",Ruto say anything ill plan exposed .. danger...,en
4,Erick Moturi,Mr Promises is a dangerous kalenjinga. Can do ...,Mr Promises is a dangerous kalenjinga. Can do ...,"[(Mr, n), (Promises, n), (dangerous, a), (kale...",Mr Promises dangerous kalenjinga . anything ...,en
...,...,...,...,...,...,...
223,Jira Mwenyewe,For what gains,For what gains,"[(gains, n)]",gain,en
224,Nkhbitu Micheni,Mr promise say another promise,Mr promise say another promise,"[(Mr, n), (promise, n), (say, None), (another,...",Mr promise say another promise,en
225,Brayoo Wiseman,Ruto is confused,Ruto is confused,"[(Ruto, n), (confused, None)]",Ruto confused,en
226,Njeri Anjeline,Stop that nonsense.,Stop that nonsense.,"[(Stop, None), (nonsense, n), (., None)]",Stop nonsense .,en


## Geta clean dataframe for text analysis

In [20]:
fin_data= pd.DataFrame(df[['Comment', 'Lemma']])

## Sentiment Analysis

In [26]:
fin_data['Vader_Sentiment'] = fin_data['Comment'].apply(lambda x: vaderSentimentAnalysis(x))
fin_data['Vader_Analysis'] = fin_data['Vader_Sentiment'].apply(lambda x: vader_analysis(x))
fin_data[1:50]

Unnamed: 0,Comment,Lemma,Vader_Sentiment,Vader_Analysis
1,Master of lies ....after raila impeached Uhuru...,Master lie .... raila impeached Uhuru benefi...,0.0516,Positive
2,Raila could not impeach Uhuru so that you bene...,Raila could impeach Uhuru benefit ... 're fo...,0.3875,Positive
3,Ruto can say anything now that his ill plans h...,Ruto say anything ill plan exposed .. danger...,-0.7351,Negative
4,Mr Promises is a dangerous kalenjinga. Can do ...,Mr Promises dangerous kalenjinga . anything ...,-0.8316,Negative
5,You still have my vote...all i wish is you had...,still vote ... wish succeeded impeachment .....,0.25,Positive
6,"If so that would have happened in 2013-17,but ...","would happened -17 , question ? ? Yet tyrann...",0.3094,Positive
7,Kumbe ni Ukweli! But who would have constituti...,Kumbe ni Ukweli ! would constitutionally ben...,0.0,Neutral
8,😂 Uhuru could not support Raila if not of the ...,Uhuru could support Raila hurtful betrayal M...,-0.617,Negative
9,If Roto wanted to impeach Uhuru he could overt...,Roto wanted impeach Uhuru could overthrow ti...,0.0,Neutral
10,this is a confirmation that Uhuru is right.Rut...,confirmation Uhuru right.Ruto ill plan .,-0.4215,Negative


In [27]:
vd_counts = fin_data["Vader_Analysis"].value_counts()
vd_counts

Positive    84
Negative    73
Neutral     71
Name: Vader_Analysis, dtype: int64

## Named Entity Recognition

In [29]:
nlp = spacy.load("political_ner_model")
fin_data["tags"] = df["clean_tweets"].apply(lambda x: [(tag.text, tag.label_) for tag in nlp(x).ents])
fin_data[1:50]

Unnamed: 0,Comment,Lemma,Vader_Sentiment,Vader_Analysis,tags
1,Master of lies ....after raila impeached Uhuru...,Master lie .... raila impeached Uhuru benefi...,0.0516,Positive,"[(Uhuru, POLITICIAN)]"
2,Raila could not impeach Uhuru so that you bene...,Raila could impeach Uhuru benefit ... 're fo...,0.3875,Positive,"[(Raila, POLITICIAN), (Uhuru, POLITICIAN)]"
3,Ruto can say anything now that his ill plans h...,Ruto say anything ill plan exposed .. danger...,-0.7351,Negative,"[(Ruto, POLITICIAN)]"
4,Mr Promises is a dangerous kalenjinga. Can do ...,Mr Promises dangerous kalenjinga . anything ...,-0.8316,Negative,[]
5,You still have my vote...all i wish is you had...,still vote ... wish succeeded impeachment .....,0.25,Positive,"[(Uhuru, POLITICIAN)]"
6,"If so that would have happened in 2013-17,but ...","would happened -17 , question ? ? Yet tyrann...",0.3094,Positive,"[(Jubilee, POLITICAL_PARTY)]"
7,Kumbe ni Ukweli! But who would have constituti...,Kumbe ni Ukweli ! would constitutionally ben...,0.0,Neutral,"[(Uhuru, POLITICIAN)]"
8,😂 Uhuru could not support Raila if not of the ...,Uhuru could support Raila hurtful betrayal M...,-0.617,Negative,"[(Uhuru, POLITICIAN), (Raila, POLITICIAN), (Mr..."
9,If Roto wanted to impeach Uhuru he could overt...,Roto wanted impeach Uhuru could overthrow ti...,0.0,Neutral,"[(Uhuru, POLITICIAN)]"
10,this is a confirmation that Uhuru is right.Rut...,confirmation Uhuru right.Ruto ill plan .,-0.4215,Negative,"[(Uhuru, POLITICIAN), (Ruto, POLITICIAN)]"


## Get names and political parties

In [30]:
fin_data["Parties"] = fin_data["tags"].apply(lambda x: get_parties(x))
fin_data["Politicians"] = fin_data["tags"].apply(lambda x: get_names(x))
fin_data[50:100]

Unnamed: 0,Comment,Lemma,Vader_Sentiment,Vader_Analysis,tags,Parties,Politicians
50,"Tell us more the three of you know something,c...","Tell us three know something , confess waiti...",0.0,Neutral,[],,[]
51,"2 b a Ruto supporter, ni kazi, jana, they all ...","b Ruto supporter , ni kazi , jana , agreed R...",0.4939,Positive,"[(Ruto, POLITICIAN), (Ruto, POLITICIAN), (Uhur...",,"[Ruto, Uhuru, Raila]"
52,"Uhuru is so confused by now,his lies are not a...","Uhuru confused , lie adding anymore.Ruto th",-0.6995,Negative,"[(Uhuru, POLITICIAN), (Ruto, POLITICIAN)]",,"[Uhuru, Ruto]"
53,Wish it could succeed but 9th it will succeed,Wish could succeed th succeed,0.8047,Positive,[],,[]
54,People urgue as if Kenya is a Kingdom. We neve...,People urgue Kenya Kingdom . never learned C...,0.0,Neutral,[],,[]
55,"Uhuru you are confused, rem the time of ICC yo...","Uhuru confused , rem time ICC give power giv...",-0.1655,Negative,"[(Uhuru, POLITICIAN)]",,[Uhuru]
56,Uhuru knows it was you period other excuses ji...,Uhuru knows period excuse jiwekee,0.0,Neutral,"[(Uhuru, POLITICIAN)]",,[Uhuru]
57,"Hahaha,that's my Fifth president!!!by the way ...","Hahaha , 's Fifth president ! ! ! way maybe ...",0.0,Neutral,"[(Raila, POLITICIAN)]",,[Raila]
58,Shifting your burden to other people won't giv...,Shifting burden people wo n't give sympathy ...,-0.6136,Negative,[],,[]
59,😛😛😛😛😛😛mmmmm. Seems there is truth in this impe...,mmmmm . Seems truth impeachment thing ! ! co...,0.6103,Positive,"[(Raila, POLITICIAN), (Uhuru, POLITICIAN)]",,"[Raila, Uhuru]"
