In [37]:
from textblob import TextBlob as TB, Word
from nltk.corpus import stopwords
import nltk
# import tensorflow_datasets as tfds
# import tensorflow_hub as tfhub
# from tensorflow import keras
# import tensorflow as tf
import pandas as pd
import tweepy as ty
import numpy as np
import re
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [38]:
twtr_info = pd.read_csv('keys_tokens.csv')
type(twtr_info)

pandas.core.frame.DataFrame

In [39]:
consumer_key = twtr_info['API Key'][0]
consumer_secret = twtr_info['API Key Secret'][0]
access_token = twtr_info['Access Token'][0]
access_token_secret = twtr_info['Access Token Secret'][0]

In [40]:
auth = ty.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = ty.API(auth, wait_on_rate_limit = True)

In [41]:
news_sources = ['nytimes', 'WSJ', 'FoxNews', 'CNN', 'BBC', 'AlJazeera']
twitter_handle = news_sources[0]

In [42]:
query = ty.Cursor(api.user_timeline, screen_name=twitter_handle, tweet_mode='extended', lang='en').items(5)

In [43]:
# organize tweets
tweet_text, date_posted = [], []
for tweet in query:
    tweet_text.append(tweet.full_text)
    date_posted.append(tweet.created_at)

Unexpected parameter: lang


In [44]:
# create dataframe using tweet_text & date_posted
tweets = pd.DataFrame()
tweet_column = 'Tweets from ' + twitter_handle
tweets.insert(loc=0, column=tweet_column, value=tweet_text)
tweets.index = date_posted
tweets.reset_index(inplace=True)
tweets

Unnamed: 0,index,Tweets from nytimes
0,2022-01-08 23:00:13+00:00,Here’s what you need to know from @wirecutter ...
1,2022-01-08 22:40:02+00:00,"Skiing is an endangered sport, caught between ..."
2,2022-01-08 22:34:56+00:00,"Mariah Bell, who at 25 won her first U.S. nati..."
3,2022-01-08 22:25:02+00:00,"The Emmy-winning HBO teen drama “Euphoria,” st..."
4,2022-01-08 22:23:43+00:00,RT @carolrosenberg: Latest: The Navy base at G...


In [45]:
# get common words from nltk.corpus
nltk.download('vader_lexicon')
nltk.download('wordnet')
nltk.download('stopwords')
common_words = stopwords.words('english')  # list of common words

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/hamzarashid/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/hamzarashid/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/hamzarashid/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [46]:
def clean_tweet(text):
    tweet = text
    to_replace = ['@[\w]+', 'RT[\s]+', '[^\s\w]', '#', 'http[\w]+']

    # remove keyword, @ mentions, RTs, ...
    for character_sequence in to_replace:
        tweet = re.sub(character_sequence, '', tweet)

    # remove words that have no impact on sentiment measure
    tweet = ' '.join(word for word in tweet.split() if word not in common_words)

    # turn words into most basic form
    tweet = ' '.join(Word(word).lemmatize() for word in tweet.split())

    return tweet

In [47]:
tweets[tweet_column] = tweets[tweet_column].apply(clean_tweet)
tweets

Unnamed: 0,index,Tweets from nytimes
0,2022-01-08 23:00:13+00:00,Heres need know finding using rapid antigen te...
1,2022-01-08 22:40:02+00:00,Skiing endangered sport caught warming planet ...
2,2022-01-08 22:34:56+00:00,Mariah Bell 25 first US national figure skatin...
3,2022-01-08 22:25:02+00:00,The Emmywinning HBO teen drama Euphoria starri...
4,2022-01-08 22:23:43+00:00,Latest The Navy base Guantanamo Bay reached pa...


In [48]:
tweet_list = list(tweets[tweet_column])
tweet_list

['Heres need know finding using rapid antigen test Covid19 useful though foolproof way protect others infection',
 'Skiing endangered sport caught warming planet global pandemic But ski touring uphill skiing growing popularity Heres',
 'Mariah Bell 25 first US national figure skating championship Friday earned spot Beijing Olympics next month become oldest American female Olympics single skater since 1928 See else made US team',
 'The Emmywinning HBO teen drama Euphoria starring Zendaya come back second season Sunday But year since Season 1 ended Heres thing left',
 'Latest The Navy base Guantanamo Bay reached pandemic high 69 resident active confirmed positive cor']

In [49]:
sia = SentimentIntensityAnalyzer()

def objectivity_scores(tweet):

    textblob_objectivity = 1 - TB(tweet).sentiment.subjectivity
    vader_objectivity = sia.polarity_scores(tweet).get('neu')
    avg_objectivity = (textblob_objectivity + vader_objectivity) / 2

    return textblob_objectivity, vader_objectivity, avg_objectivity

In [51]:
o_scores = [objectivity_scores(tweet) for tweet in tweets[tweet_column]]
tb_scores, vdr_scores, avg_scores = [], [], []
for tb_score, vdr_score, avg_score in o_scores:
    tb_scores.append(tb_score)
    vdr_scores.append(vdr_score)
    avg_scores.append(avg_score)

In [57]:
tweets['TextBlob Objectivity Score'] = tb_scores
tweets['Vader Objectivity Score'] = vdr_scores
tweets['Average Objectivity Score'] = avg_scores
pd.DataFrame(o_scores)

Unnamed: 0,0,1,2
0,1.0,0.616,0.808
1,1.0,0.634,0.817
2,0.857143,0.909,0.883071
3,1.0,0.883,0.9415
4,0.235758,0.609,0.422379


In [56]:
tweets

Unnamed: 0,index,Tweets from nytimes,TextBlob Objectivity Score,Vader Objectivity Score,Average Objectivity Score
0,2022-01-08 23:00:13+00:00,Heres need know finding using rapid antigen te...,1.0,0.616,0.808
1,2022-01-08 22:40:02+00:00,Skiing endangered sport caught warming planet ...,1.0,0.634,0.817
2,2022-01-08 22:34:56+00:00,Mariah Bell 25 first US national figure skatin...,0.857143,0.909,0.883071
3,2022-01-08 22:25:02+00:00,The Emmywinning HBO teen drama Euphoria starri...,1.0,0.883,0.9415
4,2022-01-08 22:23:43+00:00,Latest The Navy base Guantanamo Bay reached pa...,0.235758,0.609,0.422379
