In [2]:
from textblob import TextBlob as TB, Word
from nltk.corpus import stopwords
import nltk
# import tensorflow_datasets as tfds
# import tensorflow_hub as tfhub
# from tensorflow import keras
# import tensorflow as tf
import pandas as pd
import tweepy as ty
import numpy as np
import re
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [3]:
twtr_info = pd.read_csv('keys_tokens.csv')
type(twtr_info)

pandas.core.frame.DataFrame

In [4]:
consumer_key = twtr_info['API Key'][0]
consumer_secret = twtr_info['API Key Secret'][0]
access_token = twtr_info['Access Token'][0]
access_token_secret = twtr_info['Access Token Secret'][0]

In [5]:
auth = ty.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = ty.API(auth, wait_on_rate_limit = True)

In [6]:
news_sources = ['nytimes', 'WSJ', 'FoxNews', 'CNN', 'BBC', 'AlJazeera']
twitter_handle = news_sources[0]

In [7]:
query = ty.Cursor(api.user_timeline, screen_name=twitter_handle, tweet_mode='extended', lang='en').items(5)

In [8]:
# organize tweets
tweet_text, date_posted = [], []
for tweet in query:
    tweet_text.append(tweet.full_text)
    date_posted.append(tweet.created_at)

Unexpected parameter: lang


In [9]:
# create dataframe using tweet_text & date_posted
tweets = pd.DataFrame()
tweet_column = 'Tweets from ' + twitter_handle
tweets.insert(loc=0, column=tweet_column, value=tweet_text)
tweets.index = date_posted
tweets.reset_index(inplace=True)
tweets

Unnamed: 0,index,Tweets from nytimes
0,2022-01-09 00:25:03+00:00,Jumping rope isn’t just for little kids and bo...
1,2022-01-08 23:45:05+00:00,"Max Julien, the sultry, soft-voiced actor and ..."
2,2022-01-08 23:25:06+00:00,"As “Search Party” begins its fifth season, Ali..."
3,2022-01-08 23:00:13+00:00,Here’s what you need to know from @wirecutter ...
4,2022-01-08 22:40:02+00:00,"Skiing is an endangered sport, caught between ..."


In [10]:
# get common words from nltk.corpus
nltk.download('vader_lexicon')
nltk.download('wordnet')
nltk.download('stopwords')
common_words = stopwords.words('english')  # list of common words

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/hamzarashid/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/hamzarashid/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/hamzarashid/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [11]:
def clean_tweet(text):
    tweet = text
    to_replace = ['@[\w]+', 'RT[\s]+', '#', '[^\s\w]', 'http[\w]+']

    # remove @ mentions, RTs, hashtags ...
    for character_sequence in to_replace:
        tweet = re.sub(character_sequence, '', tweet)

    # remove words that have no impact on sentiment measure
    tweet = ' '.join(word for word in tweet.split() if word not in common_words)

    # turn words into most basic form
    tweet = ' '.join(Word(word).lemmatize() for word in tweet.split())

    return tweet

In [12]:
tweets[tweet_column] = tweets[tweet_column].apply(clean_tweet)
tweets

Unnamed: 0,index,Tweets from nytimes
0,2022-01-09 00:25:03+00:00,Jumping rope isnt little kid boxing montage It...
1,2022-01-08 23:45:05+00:00,Max Julien sultry softvoiced actor screenwrite...
2,2022-01-08 23:25:06+00:00,As Search Party begin fifth season Alia Shawka...
3,2022-01-08 23:00:13+00:00,Heres need know finding using rapid antigen te...
4,2022-01-08 22:40:02+00:00,Skiing endangered sport caught warming planet ...


In [13]:
tweet_list = list(tweets[tweet_column])
tweet_list

['Jumping rope isnt little kid boxing montage Its also portable workout build strength speed protects body age',
 'Max Julien sultry softvoiced actor screenwriter rose popculture prominence starring role 1973 film The Mack died Jan 1 hospital Los Angeles He 88',
 'As Search Party begin fifth season Alia Shawkat embarking new challenge saying goodbye',
 'Heres need know finding using rapid antigen test Covid19 useful though foolproof way protect others infection',
 'Skiing endangered sport caught warming planet global pandemic But ski touring uphill skiing growing popularity Heres']

In [14]:
sia = SentimentIntensityAnalyzer()

def objectivity_scores(tweet):

    textblob_objectivity = 1 - TB(tweet).sentiment.subjectivity
    vader_objectivity = sia.polarity_scores(tweet).get('neu')
    avg_objectivity = (textblob_objectivity + vader_objectivity) / 2

    return textblob_objectivity, vader_objectivity, avg_objectivity

In [15]:
o_scores = [objectivity_scores(tweet) for tweet in tweets[tweet_column]]
tb_scores, vdr_scores, avg_scores = [], [], []
for tb_score, vdr_score, avg_score in o_scores:
    tb_scores.append(tb_score)
    vdr_scores.append(vdr_score)
    avg_scores.append(avg_score)

In [16]:
tweets['TextBlob Objectivity Score'] = tb_scores
tweets['Vader Objectivity Score'] = vdr_scores
tweets['Average Objectivity Score'] = avg_scores
pd.DataFrame(o_scores)

Unnamed: 0,0,1,2
0,0.5,0.732,0.616
1,0.05,0.854,0.452
2,0.545455,0.733,0.639227
3,1.0,0.616,0.808
4,1.0,0.634,0.817


In [17]:
tweets

Unnamed: 0,index,Tweets from nytimes,TextBlob Objectivity Score,Vader Objectivity Score,Average Objectivity Score
0,2022-01-09 00:25:03+00:00,Jumping rope isnt little kid boxing montage It...,0.5,0.732,0.616
1,2022-01-08 23:45:05+00:00,Max Julien sultry softvoiced actor screenwrite...,0.05,0.854,0.452
2,2022-01-08 23:25:06+00:00,As Search Party begin fifth season Alia Shawka...,0.545455,0.733,0.639227
3,2022-01-08 23:00:13+00:00,Heres need know finding using rapid antigen te...,1.0,0.616,0.808
4,2022-01-08 22:40:02+00:00,Skiing endangered sport caught warming planet ...,1.0,0.634,0.817


In [21]:
a_scs = list(tweets['Average Objectivity Score'])
np.mean(a_scs)

0.6664454545454547