In [2]:
import nltk
import warnings
warnings.filterwarnings('ignore')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

sia = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...


Updating Vader Lexicon with External Lexicon

In [4]:
import csv
import pandas as pd

# stock market lexicon
stock_lex = pd.read_csv('lexicon_data/stock_lex.csv')
stock_lex['sentiment'] = (stock_lex['Aff_Score'] + stock_lex['Neg_Score'])/2
stock_lex = dict(zip(stock_lex.Item, stock_lex.sentiment))
stock_lex = {k:v for k,v in stock_lex.items() if len(k.split(' ')) == 1}
stock_lex_scaled = {}
for k,v in stock_lex.items():
    if v>0:
        stock_lex_scaled[k] = v / max(stock_lex.values()) * 4
    else:
        stock_lex_scaled[k] = v / min(stock_lex.values()) * -4
        
# loughran and MacDonald
positive = []
with open('lexicon_data/lm_positive.csv', 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        positive.append(row[0].strip())
        
negative = []
with open('lexicon_data/lm_negative.csv', 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        entry = row[0].strip().split(" ")
        if len(entry) > 1:
            negative.extend(entry)
        else:
            negative.extend(entry[0])
            
final_lex = {}
final_lex.update({word:2.0 for word in positive})
final_lex.update({word:-2.0 for word in negative})
final_lex.update(stock_lex_scaled)
final_lex.update(sia.lexicon)
sia.lexicon = final_lex

Scraping FB posts from Business Times

In [None]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import time
import pprint

date_sentiments = {}

for i in range(1,11):
    page = urlopen('https://www.businesstimes.com.sg/search/facebook?page='+str(i)).read()
    soup = BeautifulSoup(page, features='html.parser')
    posts = soup.findAll("div", {"class": "media-body"})
    for post in posts:
        time.sleep(1) # 1 -- 推迟执行的秒数。
        url = post.a['href']
        date = post.time.text
        # list out all results scraped from website
        print(date, url)
        try:
            link_page = urlopen(url).read()
        except:
            url = url[:-2]
            link_page = urlopen(url).read()
        link_soup = BeautifulSoup(link_page)
        sentences = link_soup.findAll("p")
        passage = ""
        for sentence in sentences:
            passage += sentence.text
        sentiment = sia.polarity_scores(passage)['compound']
        date_sentiments.setdefault(date, []).append(sentiment)
    
date_sentiment = {}
    
for k,v in date_sentiments.items():
    date_sentiment[datetime.strptime(k, '%d %b %Y').date()+ timedelta(days=1)] = round(sum(v)/float(len(v)),3)

earliest_date = min(date_sentiment.keys())

# count sentiment score
print(date_sentiment)

10 Jan 2020 https://www.businesstimes.com.sg/technology/lawmakers-facebooks-steps-to-tackle-deepfake-videos-inadequate
08 Jan 2020 https://www.businesstimes.com.sg/consumer/facebooks-new-singapore-team-to-serve-china-advertising-business
08 Jan 2020 https://www.businesstimes.com.sg/technology/facebook-tightening-policy-against-deepfakes
07 Jan 2020 https://www.businesstimes.com.sg/government-economy/trump-says-zuckerberg-told-him-hes-facebooks-number-one
28 Dec 2019 https://www.businesstimes.com.sg/banking-finance/facebooks-libra-has-failed-in-current-form-says-swiss-president
23 Dec 2019 https://www.businesstimes.com.sg/technology/data-on-more-than-250m-us-facebook-users-exposed-online
21 Dec 2019 https://www.businesstimes.com.sg/technology/twitter-facebook-target-state-linked-accounts-made-to-manipulate
20 Dec 2019 https://www.businesstimes.com.sg/garage/facebook-buys-startup-to-build-live-shopping-feature-source
20 Dec 2019 https://www.businesstimes.com.sg/technology/facebook-says-i

04 Oct 2019 https://www.businesstimes.com.sg/technology/us-allies-urge-facebook-not-to-encrypt-messages-as-they-fight-child-abuse-terrorism
03 Oct 2019 https://www.businesstimes.com.sg/technology/facebook-may-have-to-censor-online-hate-globally-eu-judges-say
02 Oct 2019 https://www.businesstimes.com.sg/technology/facebook-to-pay-subset-of-news-tab-publishers
01 Oct 2019 https://www.businesstimes.com.sg/technology/ahead-of-2020-facebook-falls-short-on-plan-to-share-data-on-disinformation
30 Sep 2019 https://www.businesstimes.com.sg/banking-finance/regulatory-concerns-may-delay-launch-of-facebooks-libra-exec
30 Sep 2019 https://www.businesstimes.com.sg/technology/facebook-whatsapp-will-have-to-share-messages-with-uk-police
27 Sep 2019 https://www.businesstimes.com.sg/technology/facebook-tests-hiding-likes-on-social-media-posts
26 Sep 2019 https://www.businesstimes.com.sg/technology/facebook-unveils-virtual-social-space-for-its-oculus-users
24 Sep 2019 https://www.businesstimes.com.sg/tec