## Libraries

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!pip install --upgrade pip

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pip
  Downloading pip-22.3.1-py3-none-any.whl (2.1 MB)
[K     |████████████████████████████████| 2.1 MB 4.3 MB/s 
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 21.1.3
    Uninstalling pip-21.1.3:
      Successfully uninstalled pip-21.1.3
Successfully installed pip-22.3.1


In [None]:
!pip install snscrape pandas nltk scipy sentencepiece transformers transformers[sentencepiece]

In [None]:
!pip list

## Functions

In [None]:
import snscrape.modules.twitter as sntwitter
import pandas as pd

In [None]:
def is_news_outlet(user):
  user_details = (user.username+user.displayname+user.description).lower()
  check_phrases = ['news', 'television', 'newspaper', 'print', 'radyo', 'radio', 'publication', 'broadsheet', 'broadcast', 'media', 'daily']
  return any([phrase in user_details for phrase in check_phrases])

In [None]:
def scrape_tweets(query, limit):
  tweets = []

  for tweet in sntwitter.TwitterSearchScraper(query).get_items():
    if len(tweets) == limit:
      break
    else:
      if not is_news_outlet(tweet.user):
        tweets.append([tweet.date, tweet.user.username, tweet.content])

  return pd.DataFrame(tweets, columns=['Date', 'User', 'Tweet'])

In [None]:
dir = '/content/gdrive/MyDrive/textmining/'

## Scrape data

In [None]:
# start of 2021 until Marcos files candidacy
q1 = "(bongbong AND marcos OR bongbongmarcos) until:2021-10-05 since:2021-01-01"
df1 = scrape_tweets(q1, 5000)
df1

Unnamed: 0,Date,User,Tweet
0,2021-10-04 23:23:26+00:00,adprlaw1987,"So, what will it be? From the looks of it, and..."
1,2021-10-04 22:18:15+00:00,gero_rgo,The Greatest Lesson Bongbong Marcos Learned Fr...
2,2021-10-04 21:36:29+00:00,renpos2,@inquirerdotnet Question: Has the Marcos famil...
3,2021-10-04 21:12:36+00:00,ronamgavin12,@bongbongmarcos the OG VP Bongbong…\nsana hind...
4,2021-10-04 20:35:16+00:00,rodev10,@TVPatrol Delicadeza naman for Bongbong Marcos...


In [None]:
# from Marcos filing candidacy to being declared president
q2 = "(bongbong AND marcos OR bongbongmarcos) until:2022-05-24 since:2021-10-06"
df2 = scrape_tweets(q2, 5000)
df2

Unnamed: 0,Date,User,Tweet
0,2022-05-23 23:54:48+00:00,2TradeAsia,Balisacan is BBM's Socioeconomic Planning chie...
1,2022-05-23 23:41:29+00:00,AmigoManila,MORE coverage of President-elect Bongbong Marc...
2,2022-05-23 23:28:28+00:00,OnePonceEnrile,Bongbong Marcos sucks ass!\n#NeverForget
3,2022-05-23 23:23:34+00:00,radharuss,Bongbong Marcos’ ‘admittance’ of a troll army ...
4,2022-05-23 23:21:18+00:00,nixonapun,@laarni1224 That's correct. Mandadaya na lang ...


In [None]:
# from Marcos taking office to current
q3 = "(bongbong AND marcos OR bongbongmarcos) until:2022-11-30 since:2022-06-30"
df3 = scrape_tweets(q3, 5000)
df3

Unnamed: 0,Date,User,Tweet
0,2022-11-29 23:14:39+00:00,Herr_Oberst45,@bongbongmarcos ULOL MO BONGBONG
1,2022-11-29 20:37:31+00:00,JustMe80041125,@RockyMa46760498 @iMPACTPH2019 @PinoyAkoBlog y...
2,2022-11-29 20:33:05+00:00,JustMe80041125,@ABSCBNNews namimili o wala pang napipisil na ...
3,2022-11-29 20:09:15+00:00,AlternIligan,@loveOfCountry7 @pizzapmore987 @seanngalvin @i...
4,2022-11-29 19:11:12+00:00,ChristineEliaz,Editorial: Bongbong’s war on drugs #Philippine...


In [None]:
keywords = "(bongbong AND marcos OR bongbongmarcos) "
dates = [('2021-10-01', '2021-10-31'), ('2022-02-01', '2022-02-28'), ('2022-03-01', '2022-03-31'),
 ('2022-05-01', '2022-05-31'), ('2022-06-01', '2022-06-30'), ('2022-08-01', '2022-08-31'),
 ('2022-09-01', '2022-09-30'), ('2022-10-01', '2022-10-31'), ('2022-10-01', '2022-11-30')]

df4 = pd.DataFrame()

for date in dates:
  date_range = f"until:{date[1]} since:{date[0]}"
  df4 = pd.concat([df4, scrape_tweets(keywords+date_range, 1)], axis=0)

df4

In [None]:
df1.to_csv(dir+'before.csv')
df2.to_csv(dir+'during.csv')
df3.to_csv(dir+'after.csv')
df4.to_csv(dir+'monthly.csv')

## Load XLM-T

In [None]:
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig
import numpy as np
from scipy.special import softmax

In [None]:
# Preprocess text (username and link placeholders)
def preprocess(text):
    new_text = []
    for t in text.split(" "):
        t = '@user' if t.startswith('@') and len(t) > 1 else t
        t = 'http' if t.startswith('http') else t
        new_text.append(t)
    return " ".join(new_text)

In [None]:
MODEL = f"cardiffnlp/twitter-xlm-roberta-base-sentiment"

tokenizer = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)

model = AutoModelForSequenceClassification.from_pretrained(MODEL)
model.save_pretrained(MODEL)

Downloading:   0%|          | 0.00/841 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

## Apply sentiment analysis to scraped data


In [None]:
df1 = pd.read_csv(dir+'before.csv')
df2 = pd.read_csv(dir+'during.csv')
df3 = pd.read_csv(dir+'after.csv')
df4 = pd.read_csv(dir+'monthly.csv')

In [None]:
def df_sentiment_analysis(df):
  df['Sentiment'] = ''
  for i, row in df.iterrows():
    text = preprocess(df.at[i, 'Tweet'])
    encoded_input = tokenizer(text, return_tensors='pt')
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    ranking = np.argsort(scores)
    ranking = ranking[::-1]
    df.at[i,'Sentiment'] = config.id2label[ranking[0]]
  return df

In [None]:
df1 = df_sentiment_analysis(df1)
df1

Unnamed: 0.1,Unnamed: 0,Date,User,Tweet,Sentiment
0,0,2021-10-04 23:23:26+00:00,adprlaw1987,"So, what will it be? From the looks of it, and...",neutral
1,1,2021-10-04 22:18:15+00:00,gero_rgo,The Greatest Lesson Bongbong Marcos Learned Fr...,positive
2,2,2021-10-04 21:36:29+00:00,renpos2,@inquirerdotnet Question: Has the Marcos famil...,negative
3,3,2021-10-04 21:12:36+00:00,ronamgavin12,@bongbongmarcos the OG VP Bongbong…\nsana hind...,positive
4,4,2021-10-04 20:35:16+00:00,rodev10,@TVPatrol Delicadeza naman for Bongbong Marcos...,negative


In [None]:
df2 = df_sentiment_analysis(df2)
df2

Unnamed: 0.1,Unnamed: 0,Date,User,Tweet,Sentiment
0,0,2022-05-23 23:54:48+00:00,2TradeAsia,Balisacan is BBM's Socioeconomic Planning chie...,neutral
1,1,2022-05-23 23:41:29+00:00,AmigoManila,MORE coverage of President-elect Bongbong Marc...,positive
2,2,2022-05-23 23:28:28+00:00,OnePonceEnrile,Bongbong Marcos sucks ass!\n#NeverForget,negative
3,3,2022-05-23 23:23:34+00:00,radharuss,Bongbong Marcos’ ‘admittance’ of a troll army ...,negative
4,4,2022-05-23 23:21:18+00:00,nixonapun,@laarni1224 That's correct. Mandadaya na lang ...,neutral


In [None]:
df3 = df_sentiment_analysis(df3)
df3

Unnamed: 0.1,Unnamed: 0,Date,User,Tweet,Sentiment
0,0,2022-11-29 23:14:39+00:00,Herr_Oberst45,@bongbongmarcos ULOL MO BONGBONG,positive
1,1,2022-11-29 20:37:31+00:00,JustMe80041125,@RockyMa46760498 @iMPACTPH2019 @PinoyAkoBlog y...,negative
2,2,2022-11-29 20:33:05+00:00,JustMe80041125,@ABSCBNNews namimili o wala pang napipisil na ...,negative
3,3,2022-11-29 20:09:15+00:00,AlternIligan,@loveOfCountry7 @pizzapmore987 @seanngalvin @i...,negative
4,4,2022-11-29 19:11:12+00:00,ChristineEliaz,Editorial: Bongbong’s war on drugs #Philippine...,neutral


In [None]:
df4 = df_sentiment_analysis(df4)
df4

Unnamed: 0.1,Unnamed: 0,Date,User,Tweet,Sentiment
0,0,2021-10-30 23:47:31+00:00,rhian_janinne,Mga chismosa dto sbe wla dw magagawa si bongbo...,negative
1,0,2022-02-27 23:59:05+00:00,WinwinEklabu,What do you want atty the mind conditioning of...,negative
2,0,2022-03-30 23:57:50+00:00,gelmesse,Kausap ko kaibigan ko ngyon taga BILIRAN.. tin...,neutral
3,0,2022-05-30 23:43:20+00:00,MakoyPalaganas,France Ambassador to the Philippines Michèle B...,neutral
4,0,2022-06-29 23:58:34+00:00,enriquietto1,"I would like to have, also, all the cases of t...",negative
5,0,2022-08-30 17:31:11+00:00,GlobalGamingBiz,Philippine President Ferdinand “Bongbong” Marc...,neutral
6,0,2022-09-29 23:58:15+00:00,narniapevensie9,"BONGBONG MARCOS, KATRINA VELARDE, TONI GONZAGA...",negative
7,0,2022-10-30 22:30:08+00:00,nixonapun,@cnnphilippines Di ba uso sa Team Bongbong Mar...,neutral
8,0,2022-11-29 23:14:39+00:00,Herr_Oberst45,@bongbongmarcos ULOL MO BONGBONG,positive


In [None]:
df1.to_csv(dir+'before_wsentiment.csv')
df2.to_csv(dir+'during_wsentiment.csv')
df3.to_csv(dir+'after_wsentiment.csv')
df4.to_csv(dir+'monthly_sentiments.csv')

## Monthly sentiment

In [None]:
df4 = pd.read_csv(dir+'monthly_sentiments.csv')
df4

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Date,User,Tweet,Sentiment
0,0,0,2021-10-30 23:47:31+00:00,rhian_janinne,Mga chismosa dto sbe wla dw magagawa si bongbo...,negative
1,1,0,2022-02-27 23:59:05+00:00,WinwinEklabu,What do you want atty the mind conditioning of...,negative
2,2,0,2022-03-30 23:57:50+00:00,gelmesse,Kausap ko kaibigan ko ngyon taga BILIRAN.. tin...,neutral
3,3,0,2022-05-30 23:43:20+00:00,MakoyPalaganas,France Ambassador to the Philippines Michèle B...,neutral
4,4,0,2022-06-29 23:58:34+00:00,enriquietto1,"I would like to have, also, all the cases of t...",negative
5,5,0,2022-08-30 17:31:11+00:00,GlobalGamingBiz,Philippine President Ferdinand “Bongbong” Marc...,neutral
6,6,0,2022-09-29 23:58:15+00:00,narniapevensie9,"BONGBONG MARCOS, KATRINA VELARDE, TONI GONZAGA...",negative
7,7,0,2022-10-30 22:30:08+00:00,nixonapun,@cnnphilippines Di ba uso sa Team Bongbong Mar...,neutral
8,8,0,2022-11-29 23:14:39+00:00,Herr_Oberst45,@bongbongmarcos ULOL MO BONGBONG,positive


In [None]:
df4["Date"] = df4["Date"].apply(pd.to_datetime)
df4["Month"] = df4["Date"].dt.month
df4["Year"] = df4["Date"].dt.year

In [None]:
df4 = df4.groupby(["Sentiment", "Month","Year"]).count()["Tweet"]
df4.head()

Sentiment  Month  Year
negative   2      2022    1
           6      2022    1
           9      2022    1
           10     2021    1
neutral    3      2022    1
Name: Tweet, dtype: int64

In [None]:
df4.to_csv(dir+'monthly_sentiments.csv')