# Import Libraries & Create API Object

In [2]:
# Twitter Analysis
# Patrick Kelly
# patkelly92@gmail.com
# https://www.linkedin.com/in/patrick--kelly/
# importing libraries and adding Authentication Information

import tweepy
import pandas as pd
import json
import time
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
english_stops = stopwords.words('english')
english_stops.append('http')

consumer_key = "xxxxxxxxxx"
consumer_secret = "xxxxxxxxxx"
access_token = "xxxxxxxxxx"
access_token_secret = "xxxxxxxxxx"

# Creating the authentication object
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
# Setting your access token and secret
auth.set_access_token(access_token, access_token_secret)
# Creating the API object while passing in auth information
api = tweepy.API(auth) 

# Tweet Extraction
### Extract JSON tweets and then store in Pandas DataFrame

In [12]:
# Using the API object to get tweets from timeline
# For the ID block in timeline, type in username or numeric twitter id

# Assign the keyword you'd like to query
query = 'your query here'

# public_tweets = api.user_timeline(id='pad_list', count=300)
public_tweets = api.search(q=query, lang='en', count=300)

#  Putting all individual json tweets in a list 
list_of_dicts = []

for tweet in public_tweets:
    list_of_dicts.append(tweet._json)

#  json.dumps writes python datastructure to a JSON string, to txt file
# we need JSON data in txt file to be able to work with it
with open('tweets.txt', 'w') as file:
        file.write(json.dumps(list_of_dicts, indent=4))

#  Open tweets.txt file with JSON string, flattens and extracts the
# the desired fields we want into list of dictionaries
my_list = []

with open('tweets.txt', encoding='utf-8') as json_file:
    all_data = json.load(json_file)
    for each_dictionary in all_data:     #each_dictionary is its own tweet
        tweet_id = each_dictionary['id_str']
        user_name = each_dictionary['user']['name']
        text = each_dictionary['text']
        follower_count = each_dictionary['user']['followers_count']
        favorite_count = each_dictionary['favorite_count']
        retweet_count = each_dictionary['retweet_count']
        created_at = each_dictionary['created_at']
        location = each_dictionary['user']['location']
        user_mentions = each_dictionary['entities']['user_mentions']
        coordinates = each_dictionary['coordinates']
        replied_to_tweet = each_dictionary['in_reply_to_status_id_str']
        
        my_list.append({'tweet_id': str(tweet_id),
                        'user_name': str(user_name),
                        'text': str(text),
                        'follower_count': int(follower_count),
                        'favorite_count': int(favorite_count),
                        'retweet_count': int(retweet_count),
                        'created_at': created_at,
                        'location': location,
                        'user_mentions': user_mentions,
                        'coordinates': coordinates,
                        'replied_to_tweet': replied_to_tweet})

df = pd.DataFrame(my_list, columns =
                 ['tweet_id', 'user_name', 'text', 'follower_count',
                  'favorite_count', 'retweet_count', 'created_at', 
                  'location', 'user_mentions', 'coordinates',
                  'replied_to_tweet'])

# Instantiate SentimentIntensityAnalyzer object
sid = SentimentIntensityAnalyzer()
df['sentiment_scores'] = df['text'].apply(sid.polarity_scores)

# Extract compound score and place in to new column
df['compound_scores'] = df['sentiment_scores'].apply(lambda x: x['compound'])

# Display DataFrame
df.head()

Unnamed: 0,tweet_id,user_name,text,follower_count,favorite_count,retweet_count,created_at,location,user_mentions,coordinates,replied_to_tweet,sentiment_scores,compound_scores
0,1340802739375366147,Robert Dominiak,@ABC This is most likely not a practical optio...,1,0,0,Sun Dec 20 23:34:18 +0000 2020,"LaGrange Park, Illinois","[{'screen_name': 'ABC', 'name': 'ABC News', 'i...",,1.3407969974445097e+18,"{'neg': 0.148, 'neu': 0.852, 'pos': 0.0, 'comp...",-0.4497
1,1340802667753435136,Tell,and manually loads into memory an embedded pay...,18,0,0,Sun Dec 20 23:34:01 +0000 2020,,[],,1.3408023360343532e+18,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0
2,1340802667485032450,Mataya,RT @ale_sp_brazil: Malwoverview offers techni...,133,0,5,Sun Dec 20 23:34:01 +0000 2020,,"[{'screen_name': 'ale_sp_brazil', 'name': 'Ale...",,,"{'neg': 0.0, 'neu': 0.844, 'pos': 0.156, 'comp...",0.4824
3,1340802467391549441,tanglewood lion,"iPhones vulnerable to hacking tool for months,...",1000,0,0,Sun Dec 20 23:33:13 +0000 2020,midlands,[],,,"{'neg': 0.137, 'neu': 0.863, 'pos': 0.0, 'comp...",-0.2263
4,1340802358998159361,Patrick Tomassetti,RT @YourAnonCentral: Warning: update your iOS ...,27,0,155,Sun Dec 20 23:32:47 +0000 2020,,"[{'screen_name': 'YourAnonCentral', 'name': 'a...",,,"{'neg': 0.118, 'neu': 0.882, 'pos': 0.0, 'comp...",-0.34


In [13]:
#  Calculate overall sentiment of the extracted tweets
df['compound_scores'].mean()

-0.11289900000000007

In [14]:
# Displaying tweets with a negative sentiment score
df[df['compound_scores'] < 0]

Unnamed: 0,tweet_id,user_name,text,follower_count,favorite_count,retweet_count,created_at,location,user_mentions,coordinates,replied_to_tweet,sentiment_scores,compound_scores
0,1340802739375366147,Robert Dominiak,@ABC This is most likely not a practical optio...,1,0,0,Sun Dec 20 23:34:18 +0000 2020,"LaGrange Park, Illinois","[{'screen_name': 'ABC', 'name': 'ABC News', 'i...",,1.3407969974445097e+18,"{'neg': 0.148, 'neu': 0.852, 'pos': 0.0, 'comp...",-0.4497
3,1340802467391549441,tanglewood lion,"iPhones vulnerable to hacking tool for months,...",1000,0,0,Sun Dec 20 23:33:13 +0000 2020,midlands,[],,,"{'neg': 0.137, 'neu': 0.863, 'pos': 0.0, 'comp...",-0.2263
4,1340802358998159361,Patrick Tomassetti,RT @YourAnonCentral: Warning: update your iOS ...,27,0,155,Sun Dec 20 23:32:47 +0000 2020,,"[{'screen_name': 'YourAnonCentral', 'name': 'a...",,,"{'neg': 0.118, 'neu': 0.882, 'pos': 0.0, 'comp...",-0.34
5,1340802303045988352,n4to,Many would say Stuxnet was the most devastatin...,2,0,0,Sun Dec 20 23:32:34 +0000 2020,,[],,,"{'neg': 0.43, 'neu': 0.57, 'pos': 0.0, 'compou...",-0.9193
7,1340802009537101825,Cyber Security News,"RT @keepnetlabs: In #spearphishing attacks, cr...",9476,0,3,Sun Dec 20 23:31:24 +0000 2020,,"[{'screen_name': 'keepnetlabs', 'name': 'Keepn...",,,"{'neg': 0.304, 'neu': 0.553, 'pos': 0.143, 'co...",-0.5423
8,1340802007783903232,Security Testing,"RT @keepnetlabs: In #spearphishing attacks, cr...",33768,0,3,Sun Dec 20 23:31:24 +0000 2020,"Hyderabad, India","[{'screen_name': 'keepnetlabs', 'name': 'Keepn...",,,"{'neg': 0.304, 'neu': 0.553, 'pos': 0.143, 'co...",-0.5423
10,1340801997092646913,Security Testing,RT @reach2ratan: 2021 Cybersecurity Prediction...,33768,0,7,Sun Dec 20 23:31:21 +0000 2020,"Hyderabad, India","[{'screen_name': 'reach2ratan', 'name': 'Ratan...",,,"{'neg': 0.178, 'neu': 0.822, 'pos': 0.0, 'comp...",-0.3818
11,1340801994315993089,Cyber Security News,RT @reach2ratan: 2021 Cybersecurity Prediction...,9476,0,7,Sun Dec 20 23:31:20 +0000 2020,,"[{'screen_name': 'reach2ratan', 'name': 'Ratan...",,,"{'neg': 0.178, 'neu': 0.822, 'pos': 0.0, 'comp...",-0.3818
12,1340801990692122632,Wolfhodl Ⓥ,RT @CryptoArquitect: @JimmyMcShill @notsofast ...,962,0,1,Sun Dec 20 23:31:20 +0000 2020,Blockchain,"[{'screen_name': 'CryptoArquitect', 'name': '฿...",,,"{'neg': 0.1, 'neu': 0.9, 'pos': 0.0, 'compound...",-0.2732
13,1340801975093518344,Security Testing,RT @ptracesecurity: Threat Roundup for Decembe...,33768,0,2,Sun Dec 20 23:31:16 +0000 2020,"Hyderabad, India","[{'screen_name': 'ptracesecurity', 'name': 'Pt...",,,"{'neg': 0.195, 'neu': 0.805, 'pos': 0.0, 'comp...",-0.5267


# Export to Excel

In [15]:
df.to_excel('tweets.xlsx')
%ls

Twitter Analysis Training.ipynb  tweet.xlsx
Twitter Analysis.ipynb           tweets.txt


# Follower Extraction Tool

In [22]:
# Extracts list of all follower IDs for a certain accont, 'screen_name'

# Account, of which you'd like to see followers of:
screen_name = "marklamond"

follower_ID_List = []
for page in tweepy.Cursor(api.followers_ids, screen_name=screen_name).pages():
    follower_ID_List.extend(page)
    print(len(follower_ID_List))
    time.sleep(30)

print("You've added {} follower IDs to the follower".format(len(follower_ID_List)))

41
You've added 41 follower IDs to the follower


In [23]:
# Here, we take our new list of followers' IDs and use them to query Twitter's API
# for more detailed information. We then store that info in a DataFrame.

my_list = []
y = 0
z = 99


for x in range(int(len(follower_ID_List)/100+1)):
#for x in range(2):
    # Takes list of follower_ids and finds that user. 100 at a time
    followers = api.lookup_users(user_ids=follower_ID_List[y:z])
    followers = list(followers)
    y += 100
    z += 100
    for user in followers:
        follower_id = user.id_str
        name = user.name
        screen_name = user.screen_name
        description = user.description
        location = user.location
        follower_count = user.followers_count
        friends_count = user.friends_count
        favorite_count = user.favourites_count
        statuses_count = user.statuses_count
        created_at = user.created_at        
        

        my_list.append({'follower_id': str(follower_id),
                        'name': str(name),
                        'screen_name': str(screen_name),
                        'description': str(description),
                        'location': str(location),
                        'follower_count': int(follower_count),
                        'friends_count': int(friends_count),
                        'favorite_count': int(favorite_count),
                        'statuses_count': int(statuses_count),
                        'created_at': created_at})
        

df = pd.DataFrame(my_list, columns =
                 ['follower_id', 'name', 'screen_name', 'description', 
                  'location', 'follower_count', 'friends_count', 'favorite_count',
                  'statuses_count', 'created_at'])


df1 = pd.DataFrame(columns =
                 ['follower_id', 'name', 'screen_name', 'description', 
                  'location', 'follower_count', 'friends_count', 'favorite_count',
                  'statuses_count', 'created_at'])

df1.set_index('follower_id')

df1 = df1.append(df)


sid = SentimentIntensityAnalyzer()
df1['sentiment_scores'] = df1['description'].apply(sid.polarity_scores)

# Extract compound score and place in to new column
df1['compound_scores'] = df1['sentiment_scores'].apply(lambda x: x['compound'])


print("Job Finished")

Job Finished
