# import packages

In [52]:
import tweepy
import yaml
import json
import sqlite3
from datetime import datetime
import pandas as pd

# Read twitter Authentication Keys

In [53]:
# yaml file reader funtion
def read_yaml(file_path):
    with open(file_path, "r") as f:
        return yaml.safe_load(f)

# yaml config file path
file_path = "twitter_api_key_config.yaml"
# read from config file
api_credential = read_yaml(file_path)

# Create Twitter Authentication

In [54]:
# API authentication
auth = tweepy.OAuthHandler(api_credential["api_key"], \
                           api_credential["api_secret_token"])
auth.set_access_token(api_credential["access_token"], \
                      api_credential["access_token_secret"])
api = tweepy.API(auth, wait_on_rate_limit=True)

# Create the Database and Required Tables

In [91]:
# establish a database connection
conn = sqlite3.connect('goodbot_table.db')
cur = conn.cursor()
create_tweet_info_table = """CREATE TABLE tweet_info(tweet_id BIGINT PRIMARY KEY, \
                                          user_id BIGINT, \
                                          tweet_lang TEXT, \
                                          tweet_time TEXT, \
                                          source TEXT, \
                                          tweet_text TEXT,\
                                          quote_count TEXT, \
                                          reply_count INT, \
                                          retweet_count INT,\
                                          tweet_favorite_count INT, \
                                          hashtags TEXT, \
                                          short_urls TEXT, \
                                          expanded_urls TEXT, \
                                          user_mentions TEXT, \
                                          id_str TEXT, \
                                          truncated INT, \
                                          in_reply_to_status_id BIGINT, \
                                          in_reply_to_status_id_str TEXT, \
                                          in_reply_to_user_id BIGINT, \
                                          in_reply_to_user_id_str TEXT, \
                                          in_reply_to_screen_name TEXT, \
                                          coordinates TEXT, \
                                          place TEXT
                                          );"""

create_user_info_table =  """CREATE TABLE user_info(user_id BIGINT PRIMARY KEY, \
                                          user_screen_name TEXT, \
                                          user_name TEXT, \
                                          user_language TEXT, \
                                          location TEXT, \
                                          profile_url TEXT, \
                                          description TEXT, \
                                          protected TEXT, \
                                          verified TEXT, \
                                          created_at TEXT, \
                                          friends_count BIGINT, \
                                          followers_count BIGINT,\
                                          favorites_count BIGINT, \
                                          statuses_count BIGINT, \
                                          id_str TEXT, \
                                          url TEXT);"""

cur.execute(create_tweet_info_table)
cur.execute(create_user_info_table)
conn.close()

# check if a table exists in the database

In [55]:
conn = sqlite3.connect('tbt_table.db')
cur = conn.cursor()

table_exist_query = ''' SELECT count(*) FROM sqlite_master WHERE type='table' AND name='tweet_info' '''
cur.execute(table_exist_query)
exist_result = cur.fetchone()

if exist_result[0]==1:
    print("tweet_info table exists.")
else:
    print("tweet_info table does not exist.")
conn.close()

tweet_info table exists.


# define a StreamListener Object

In [56]:
# override tweepy.StreamListener to add logic to on_status
class MyStreamListener(tweepy.StreamListener):
    def __init__(self, listen_time=60):
        super(MyStreamListener, self).__init__()
        self.counter = 0
        print("Initialized Tweepy StreamListener.")
        self.start_time = datetime.now()
        self.current_time = datetime.now()
        self.listen_time = listen_time
        self.unique_user_id_set = set([])
        # adding database connection code
        self.conn = sqlite3.connect('tbt_table.db')
        self.cur  = self.conn.cursor()
        
    def insert_data(self, data):
        tweet_object=json.loads(data) # convert "string-line" into json
        # check if json object has a key id. Otherwise continue to next.
        if 'id' in tweet_object.keys(): 
            
            ##### Stuff I changed ######
            
            # tweet object information
            tweet_id        = tweet_object['id']                                #14
            user_id         = tweet_object['user']['id']                        #
            tweet_lang      = tweet_object['lang']                              #
            tweet_time      = str(pd.to_datetime(tweet_object['created_at']))   # created_at duplicate
            source          = tweet_object['source']                            #
            tweet_text      = tweet_object['text']                              #

            # tweet numeric information
            quote_count = tweet_object['quote_count']                           #
            reply_count = tweet_object['reply_count']                           #
            retweet_count = tweet_object['retweet_count']                       #
            tweet_favorite_count = tweet_object['favorite_count']               #

            # meta-content information
            hashtags = [str(hashtag['text']) for hashtag in tweet_object['entities']['hashtags']]  #
            hashtags = ",".join(hashtags)
            short_urls = [str(url['url']) for url in tweet_object['entities']['urls']]             #
            short_urls = ",".join(short_urls)
            expanded_urls = []                                                                     #
            try:
                expanded_urls = [str(url['expanded_url']) for url in tweet_object['entities']['urls']]
            except:
                print('Error Message: No Expanded URL.')
            expanded_urls = ",".join(expanded_urls)

            # user interaction based informations    
            user_mentions = [str(user_mentions['id'])\
                        for user_mentions in tweet_object['entities']['user_mentions']]
            user_mentions = ",".join(user_mentions)                                                 #


            # New Parameters
            # used try/except to ensure that program does not fail 

            # id_str
            try:
                id_str = tweet_object['id_str'] 
            except:
                id_str = None

            # truncated 
            try:
                truncated = tweet_object['truncated'] 
            except:
                truncated = None

            # in_reply_to_status_id 
            try:
                in_reply_to_status_id = tweet_object['in_reply_to_status_id'] 
            except:
                in_reply_to_status_id = None

            # in_reply_to_status_id_str 
            try:
                in_reply_to_status_id_str = tweet_object['in_reply_to_status_id_str'] 
            except:
                in_reply_to_status_id_str = None

            # in_reply_to_user_id  
            try:
                in_reply_to_user_id  = tweet_object['in_reply_to_user_id'] 
            except:
                in_reply_to_user_id  = None

            # in_reply_to_user_id_str 
            try:
                in_reply_to_user_id_str = tweet_object['in_reply_to_user_id_str'] 
            except:
                in_reply_to_user_id_str = None
            
            # in_reply_to_screen_name 
            try:
                in_reply_to_screen_name = tweet_object['in_reply_to_screen_name'] 
            except:
                in_reply_to_screen_name = None

            # coordinates 
            try:
                coordinates = tweet_object['coordinates'] 
            except:
                coordinates = None

            # place 
            try:
                place = tweet_object['place'] 
            except:
                place = None


            tweet_info = (tweet_id, user_id, tweet_lang,\
                    tweet_time, source, tweet_text,\
                    quote_count, reply_count, retweet_count,\
                    tweet_favorite_count, hashtags, short_urls,\
                    expanded_urls, user_mentions, id_str, truncated,\
                    in_reply_to_status_id, in_reply_to_status_id_str,\
                    in_reply_to_user_id, in_reply_to_user_id_str,\
                    in_reply_to_screen_name, coordinates, place)
            self.cur.execute("INSERT INTO tweet_info \
            VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);", tweet_info)

            ########## End of my changes #############
            
            # user profile information

            if user_id in self.unique_user_id_set:
                pass
            else:
                self.unique_user_id_set.add(user_id)
                
                user_screen_name      = tweet_object['user']['screen_name']
                user_name             = tweet_object['user']['name']
                user_language         = tweet_object['user']['lang']  
                location              = tweet_object['user']['location']
                profile_url           = tweet_object['user']['url']
                description           = tweet_object['user']['description']
                protected             = tweet_object['user']['protected']
                verified              = tweet_object['user']['verified']
                created_at            = str(pd.to_datetime(tweet_object['user']['created_at']))
                friends_count         = tweet_object['user']['friends_count']
                followers_count       = tweet_object['user']['followers_count']
                favorites_count       = tweet_object['user']['favourites_count']
                statuses_count        = tweet_object['user']['statuses_count']
                
                try:
                    id_str = tweet_object['user']['id_str'] 
                except Exception as e:
                    print('Exception: id_str')
                    print(e)
                    id_str = None
                    
                try:
                    url = tweet_object['user']['url'] 
                except Exception as e:
                    print('Exception: url')
                    print(e)
                    url = None
                    
                    
                
                    
            

                user_information = (user_id, user_screen_name, user_name,\
                       user_language, location, profile_url,\
                       description, protected, verified, created_at,\
                       friends_count, followers_count,\
                       favorites_count, statuses_count, id_str, url)
                self.cur.execute(" INSERT INTO user_info VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);", user_information)
            
            self.conn.commit()
        
    def on_data(self, data):
        self.current_time = datetime.now()
        time_elapsed = (self.current_time - self.start_time).total_seconds()
        if time_elapsed < self.listen_time:
            try:
                self.counter += 1
                """
                Changing the code here.
                Previously, we saved to Text file.
                Now, we will pass this to the Database insertor method.
                """
                # -- self.output_file.write(str(data))
                print(f"Tweet Processed: {self.counter}\n")
                self.insert_data(str(data))
                
            except Exception as e:
                print(f"On data Exception:{e}.")
        else:
            print(f"Stream listen time period ended. Total listen time: {self.listen_time} seconds.\n\n")
            print(f"Total Tweet processed: {self.counter}")
            self.conn.close()
            return False

    # handling Errors
    def on_error(self, status_code):
        print(f"status_code: {status_code}")
        if status_code == 420:
            #returning False in on_error disconnects the stream
            return False

# create a stream

In [57]:
myStreamListener = MyStreamListener(listen_time=90000)
myStream = tweepy.Stream(api.auth, myStreamListener)

Initialized Tweepy StreamListener.


# start the streamer

In [58]:
keywords = ['#tbt']







try:
    print("Stream Filter")
    myStream.filter(track=keywords)
    print("DONE")
except Exception as e:
    print(f"error in stream filter {e}")

Stream Filter
Tweet Processed: 1

Tweet Processed: 2

Tweet Processed: 3

Tweet Processed: 4

Tweet Processed: 5

Tweet Processed: 6

Tweet Processed: 7

Tweet Processed: 8

Tweet Processed: 9

Tweet Processed: 10

Tweet Processed: 11

Tweet Processed: 12

Tweet Processed: 13

Tweet Processed: 14

Tweet Processed: 15

Tweet Processed: 16

Tweet Processed: 17

Tweet Processed: 18

Tweet Processed: 19

Tweet Processed: 20

Tweet Processed: 21

Tweet Processed: 22

Tweet Processed: 23

On data Exception:Error binding parameter 21 - probably unsupported type..
Tweet Processed: 24

On data Exception:UNIQUE constraint failed: user_info.user_id.
Tweet Processed: 25

On data Exception:Error binding parameter 22 - probably unsupported type..
Tweet Processed: 26

Tweet Processed: 27

Tweet Processed: 28

On data Exception:Error binding parameter 22 - probably unsupported type..
Tweet Processed: 29

On data Exception:Error binding parameter 22 - probably unsupported type..
Tweet Processed: 30

Tw

Tweet Processed: 268

Tweet Processed: 269

Tweet Processed: 270

Tweet Processed: 271

Tweet Processed: 272

Tweet Processed: 273

Tweet Processed: 274

On data Exception:UNIQUE constraint failed: user_info.user_id.
Tweet Processed: 275

Tweet Processed: 276

Tweet Processed: 277

Tweet Processed: 278

Tweet Processed: 279

Tweet Processed: 280

Tweet Processed: 281

Tweet Processed: 282

Tweet Processed: 283

On data Exception:UNIQUE constraint failed: user_info.user_id.
Tweet Processed: 284

Tweet Processed: 285

Tweet Processed: 286

Tweet Processed: 287

Tweet Processed: 288

Tweet Processed: 289

Tweet Processed: 290

Tweet Processed: 291

Tweet Processed: 292

Tweet Processed: 293

Tweet Processed: 294

Tweet Processed: 295

On data Exception:UNIQUE constraint failed: user_info.user_id.
Tweet Processed: 296

On data Exception:Error binding parameter 22 - probably unsupported type..
Tweet Processed: 297

Tweet Processed: 298

On data Exception:UNIQUE constraint failed: user_info.

(1438344497218326532, 192832526, 'en', '2021-09-16 03:30:25+00:00', '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>', 'Its called PRIVATE LIFE for a reason #thursdaymorning #Tbt https://t.co/17uTnZCSbY', '0', 0, 0, 0, 'thursdaymorning,Tbt', '', '', '')



(192832526, 'sucrepie_', 'Judith', None, 'Hungary', None, 'Agriculture| Plant based food $ smoothies recipes | Relationships| Public Relations | God fearing | Model | Research| Realist | I love love and love to be loved', '0', '0', '2010-09-20 08:21:45+00:00', 156, 133, 5347, 2320)


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)






user_info_column_names: ['user_id', 'user_screen_name', 'user_name', 'user_language', 'location', 'profile_url', 'description', 'protected', 'verified', 'created_at', 'friends_count', 'followers_count', 'favorites_count', 'statuses_count']


# fetch the data into a pandas dataframe

In [19]:
conn = sqlite3.connect('goodbot_table.db')
cur = conn.cursor()

cur.execute("SELECT * FROM tweet_info")
tweet_info_all_result = cur.fetchall()
tweet_info_column_names = [description[0] for description in cur.description]
tweet_info_dataframe = pd.DataFrame(tweet_info_all_result, \
                                   columns=tweet_info_column_names)
print(f"No of tweet stored: {len(tweet_info_all_result)}, {tweet_info_dataframe.shape[0]}\n")



cur.execute("SELECT * FROM user_info")
user_info_all_result = cur.fetchall()
user_info_column_names = [description[0] for description in cur.description]
user_info_dataframe = pd.DataFrame(user_info_all_result, \
                                   columns=user_info_column_names)
print(f"No of user info stored: {len(user_info_all_result)}, {user_info_dataframe.shape[0]}\n")

conn.close()

No of tweet stored: 50237, 50237

No of user info stored: 3416, 3416



In [5]:
tweet_info_dataframe.head()

Unnamed: 0,tweet_id,user_id,tweet_lang,tweet_time,source,tweet_text,quote_count,reply_count,retweet_count,tweet_favorite_count,...,user_mentions,id_str,truncated,in_reply_to_status_id,in_reply_to_status_id_str,in_reply_to_user_id,in_reply_to_user_id_str,in_reply_to_screen_name,coordinates,place
0,1459610441450409984,551700157,in,2021-11-13 19:53:42+00:00,"<a href=""http://twittbot.net/"" rel=""nofollow"">...",Tipe ideal Nana adalah Lee Sun Gyun\nNana deka...,0,0,0,0,...,,1459610441450409984,0,,,,,,,
1,1459610446412259328,935681833,ja,2021-11-13 19:53:43+00:00,"<a href=""http://twittbot.net/"" rel=""nofollow"">...",生産量1位はグンマー #bot,0,0,0,0,...,,1459610446412259328,0,,,,,,,
2,1459610448920473600,2909597051,en,2021-11-13 19:53:43+00:00,"<a href=""http://twittbot.net/"" rel=""nofollow"">...",Suggestions are always welcome! #BOT,0,0,0,0,...,,1459610448920473600,0,,,,,,,
3,1459610461041983488,2880050479,ja,2021-11-13 19:53:46+00:00,"<a href=""http://twittbot.net/"" rel=""nofollow"">...",にゃーん！ #bot,0,0,0,0,...,,1459610461041983488,0,,,,,,,
4,1459610461792473093,1348512671486812160,vi,2021-11-13 19:53:46+00:00,"<a href=""https://mobile.twitter.com"" rel=""nofo...","RT @meopho270699: 😗😗😗 sáng sớm k có gì ăn, đượ...",0,0,0,0,...,1.1260172857877504e+18,1459610461792473093,0,,,,,,,


In [48]:
sample = user_info_dataframe.sample(n=5)
sample['user_screen_name'].values

array(['fabricsarts', 'P_KO_jintai', 'Khanh19872677', 'Swat33662057',
       'Aska_Eva02'], dtype=object)

# Add the bot rating column to the database table

In [18]:
conn = sqlite3.connect('goodbot_table.db')
cur = conn.cursor()
add_bot_rating = """ALTER TABLE user_info \
                    ADD bot_rating DECIMAL(1,2);"""

cur.execute(add_bot_rating)
conn.close()

# Get a sample of 5, 10, 15 users

In [63]:
sample = user_info_dataframe.sample(n=10)
sample['user_screen_name'].values

array(['PienaNashetania', 'inthreek', 'ocaesar0310', 'TnNguynNgc12',
       'a_kitsumaru', 'cephalopodluke2', 'ErzaOfFairyTail',
       'Farozipauundana', 'himeka_s', 'Long123475'], dtype=object)

# Update the bot rating for an array of screen names

In [64]:
conn = sqlite3.connect('goodbot_table.db')
cur = conn.cursor()

screen_names = ['PienaNashetania', 'inthreek', 'ocaesar0310', 'TnNguynNgc12',
       'a_kitsumaru', 'cephalopodluke2', 'ErzaOfFairyTail',
       'Farozipauundana', 'himeka_s', 'Long123475']
scores = [4.9,4.2,0.6,4.8,4.8,3.8,4.1,2.6,4.8,4.6]

for num in range(0,10):
    update_bot_rating = """UPDATE user_info \
                           SET bot_rating ='""" + str(scores[num]) + """' \
                           WHERE user_screen_name='""" + screen_names[num] + """';"""
    
    print(update_bot_rating)

    cur.execute(update_bot_rating)
    
conn.commit()
conn.close()

UPDATE user_info                            SET bot_rating ='4.9'                            WHERE user_screen_name='PienaNashetania';
UPDATE user_info                            SET bot_rating ='4.2'                            WHERE user_screen_name='inthreek';
UPDATE user_info                            SET bot_rating ='0.6'                            WHERE user_screen_name='ocaesar0310';
UPDATE user_info                            SET bot_rating ='4.8'                            WHERE user_screen_name='TnNguynNgc12';
UPDATE user_info                            SET bot_rating ='4.8'                            WHERE user_screen_name='a_kitsumaru';
UPDATE user_info                            SET bot_rating ='3.8'                            WHERE user_screen_name='cephalopodluke2';
UPDATE user_info                            SET bot_rating ='4.1'                            WHERE user_screen_name='ErzaOfFairyTail';
UPDATE user_info                            SET bot_rating ='2.6'        

# Verify that the bot rating was updated. Query only the users with more than 3.5.

In [66]:
conn = sqlite3.connect('goodbot_table.db')
cur = conn.cursor()

cur.execute("SELECT * FROM user_info WHERE bot_rating > 3.5")

#Check how many users in total
#cur.execute("SELECT * FROM user_info WHERE bot_rating IS NOT NULL")

user_info_all_result = cur.fetchall()
user_info_column_names = [description[0] for description in cur.description]
user_info_bot_dataframe = pd.DataFrame(user_info_all_result, \
                                   columns=user_info_column_names)
print(f"No of user info stored: {len(user_info_all_result)}, {user_info_dataframe.shape[0]}\n")

conn.close()

No of user info stored: 20, 3416



In [62]:
user_info_bot_dataframe.head(10)

Unnamed: 0,user_id,user_screen_name,user_name,user_language,location,profile_url,description,protected,verified,created_at,friends_count,followers_count,favorites_count,statuses_count,id_str,url,bot_rating
0,3243767972,xPirateCoveFox,Jeremy Fitzgerald,,The Office,http://Scottgames.com,A-all I wanted w-was quick money... #Single,0,0,2015-06-13 01:41:25+00:00,39,285,13,24326,3243767972,http://Scottgames.com,4.5
1,1187889491949670404,fabricsarts,fabricsarts_co,,東京都目黒区自由が丘,http://www.fabrics-arts.net,目黒区自由が丘のバッグメーカーです。自社縫製工場でオリジナル商品を小ロットから大量生産まで致...,0,0,2019-10-26 00:31:55+00:00,2,5,188,8711,1187889491949670404,http://www.fabrics-arts.net,4.6
2,618872117,Aska_Eva02,惣流・アスカ・ラングレー,,第３新東京市,http://twpf.jp/Aska_Eva02,アタシはエヴァンゲリオン弐号機パイロット惣流・アスカ・ラングレーよ！アタシは非公式なりきりだ...,0,0,2012-06-26 08:34:59+00:00,264,522,9,19173,618872117,http://twpf.jp/Aska_Eva02,4.9
3,1334120898,IbaraMayaka_AR,Ibara Mayaka,,"Kamiyama, Japan",http://hakuna-otaku-matata.tumblr.com,|| I'm Back From 2 Months of Hibernate. Mentio...,0,0,2013-04-07 13:39:23+00:00,37,76,1,14744,1334120898,http://hakuna-otaku-matata.tumblr.com,4.2
4,602086277,P_KO_jintai,P子,,,,自分は「人類は衰退しました」の世界よりきた…ぴ、ぴおんであります。隊長殿にP子と名付けていた...,0,0,2012-06-07 18:20:39+00:00,75,108,0,12796,602086277,,4.8
5,1265897937638309891,Swat33662057,Swat,,,,......,0,0,2020-05-28 06:50:56+00:00,411,13,1729,1595,1265897937638309891,,4.7
6,1222181880251863041,Khanh19872677,Khanh,,,,có thể sắp có trò mới,0,0,2020-01-28 15:37:37+00:00,77,3,19,321,1222181880251863041,,4.2
7,1416693810210349056,Ho50290170,Ơ Nhox,,,,Vui thôi đừng vui quá,0,0,2021-07-18 09:38:34+00:00,1784,60,2,274,1416693810210349056,,4.9
8,1452576377937403912,Mahmudu21176197,mahmudtpi97,,"Dinajpur, Rangpur, Bangladesh",https://mahmud.coderit.fun/,I am Front-end and WordPress Developer with 5+...,0,0,2021-10-25 10:03:21+00:00,68,11,8,58,1452576377937403912,https://mahmud.coderit.fun/,3.1
9,1461065138593677314,NicksTabieros,nicks tabieros,,,,Cryptic stupid,0,0,2021-11-17 20:14:31+00:00,19,0,0,3,1461065138593677314,,4.9


In [13]:
response = api.user_timeline(screen_name="Sungjae__95")
for elem in response:
    print(elem._json)

{'created_at': 'Wed Dec 01 00:53:19 +0000 2021', 'id': 1465846437213016066, 'id_str': '1465846437213016066', 'text': 'Tipe ideal Nana adalah Lee Sun Gyun\nNana dekat dengan Nicole KARA #Bot', 'truncated': False, 'entities': {'hashtags': [{'text': 'Bot', 'indices': [66, 70]}], 'symbols': [], 'user_mentions': [], 'urls': []}, 'source': '<a href="http://twittbot.net/" rel="nofollow">twittbot.net</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 551700157, 'id_str': '551700157', 'name': 'Yook Sungjae', 'screen_name': 'Sungjae__95', 'location': '', 'description': '#RPWithNoAgency | 95 lines', 'url': 'http://t.co/PsIYIunv', 'entities': {'url': {'urls': [{'url': 'http://t.co/PsIYIunv', 'expanded_url': 'http://google.com', 'display_url': 'google.com', 'indices': [0, 20]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 415, 'friends_count': 

# close the database connection

In [18]:
conn.close()