# Libraries Import

In [1]:
# LIBRARIES IMPORT

# for working with dataframes, CSV
import pandas as pd

# for working with twitter API
import tweepy

# for working with wrapping python codes into SQL queries
import sqlalchemy as db

# for getting current date and time for CSV file creation
from datetime import datetime

# Create Table in PostgreSQL

In [2]:
# Create connection engine
engine = db.create_engine('postgresql://postgres:admin@localhost:5432/tweet-crawl') 
#user postgres, password admin - change yours accordingly. Also check the port number
conn = engine.raw_connection()

# Create new tables in PostgreSQL
commands = ('''CREATE TABLE IF NOT EXISTS twitterDatabase (tweet_id BIGINT PRIMARY KEY,
                                                           username TEXT,
                                                           display_name TEXT,
                                                           location TEXT,
                                                           followers_count INTEGER,
                                                           following_count INTEGER,
                                                           tweet_text TEXT,
                                                           hashtags TEXT,
                                                           polarity NUMERIC(3,2),
                                                           subjectivity NUMERIC(3,2));''')

# Initialize connection to PostgreSQL
cur = conn.cursor()
table_count = 0

# Create cursor to execute SQL commands
cur.execute(commands)
table_count += 1

# Close communication with server
conn.commit()
cur.close()
conn.close()

print(str(table_count),"table(s) have been created in PostgreSQL.")

1 table(s) have been created in PostgreSQL.


# Credentials and Authorization

In [3]:
# Credentials obtained from twitter developer account to access API
# Update "XXXX" to your own credentials keys
consumer_key = "XXXX"
consumer_secret = "XXXX"
access_key = "XXXX"
access_secret = "XXXX"

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)

# Query by Hashtag

Creation of queries using Tweepy API

Function is focused on completing the query then providing a CSV file of that query using pandas

And then inserting the data directly into PostgreSQL database

In [4]:
# function to display data of each tweet
def printtweetdata(n, ith_tweet):
        print()
        print(f"====Tweet {n}====")
        print(f"Tweet ID:{ith_tweet[0]}")
        print(f"Userame:{ith_tweet[1]}")
        print(f"Display Name:{ith_tweet[2]}")
        print(f"Location:{ith_tweet[3]}")
        print(f"Follower Count:{ith_tweet[4]}")
        print(f"Following Count:{ith_tweet[5]}")
        print(f"Tweet Text:{ith_tweet[6]}")
        print(f"Hashtags Used:{ith_tweet[7]}")


# function to perform data extraction
def scrapeHash(words, numtweet):

        # Creating DataFrame using pandas
        tweetsDF = pd.DataFrame(columns=['tweet_id',
                                   'username',
                                   'display_name',
                                   'location',
                                   'followers',
                                   'following',
                                   'text',
                                   'hashtags'])

        # We are using .Cursor() to search through twitter for the required tweets.
        # The number of tweets can be restricted using .items(number of tweets)
        tweets = tweepy.Cursor(api.search_tweets,
                               words,
                               tweet_mode='extended').items(numtweet)


        # .Cursor() returns an iterable object.
        # Each item in the iterator has various attributes
        # that you can access to get information about each tweet
        list_tweets = [tweet for tweet in tweets]

        # Counter to maintain Tweet Count
        i = 1
        
        # we will iterate over each tweet in the list for extracting information about each tweet
        val = []
        for tweet in list_tweets:
            tweet_id = tweet.id
            username = tweet.user.screen_name
            display_name = tweet.user.name
            location = tweet.user.location
            followers = tweet.user.followers_count
            following = tweet.user.friends_count
            hashtags = tweet.entities['hashtags']
            
            # Retweets can be distinguished by a retweeted_status attribute,
            # in case it is an invalid reference, except block will be executed
            try:
                text = tweet.retweeted_status.full_text
            except AttributeError:
                text = tweet.full_text
            
            # extracting all hashtags in the tweet
            # because there may be multiple hashtags in a tweet
            hashtext = list()            
            for j in range(0, len(hashtags)):
                hashtext.append(hashtags[j]['text'])
                
            # Here we are appending all the extracted information in the DataFrame
            ith_tweet = [tweet_id, username,
                         display_name, location,
                         followers, following,
                         text, hashtext]
            tweetsDF.loc[len(tweetsDF)] = ith_tweet

            # Function call to print tweet data on screen
            printtweetdata(i, ith_tweet)
            i = i+1
            
            val.append(ith_tweet)
            

        # we will save our database as a CSV file.
        tweetsDF.to_csv('hashtag-{}-tweets_{}.csv'.format(words, datetime.now().strftime("%Y-%m-%d_%H%M%S")), sep=',', index=False)

        # SQL command to insert scrapped tweet data into PostgreSQL database
        sql = '''
        INSERT INTO twitterDatabase(tweet_id, username, display_name, location, followers_count, following_count,
        tweet_text, hashtags)
        VALUES(%s,%s,%s,%s,%s,%s,%s,%s
        ) ON CONFLICT(tweet_id) DO NOTHING
        '''
        
        # Create connection engine
        engine = db.create_engine('postgresql://postgres:admin@localhost:5433/tweet-crawl') 
        #user postgres, password admin - change yours accordingly. Also check the port number
        conn = engine.raw_connection()
        
        # Initialize connection to PostgreSQL
        cur = conn.cursor()
        
        # Create cursor to execute SQL commands
        cur.executemany(sql, val)

        # Close communication with server
        conn.commit()
        cur.close()
        conn.close()

In [5]:
# Enter Hashtag
print("Enter Twitter HashTag to search for.")

# To make sure user enters only valid inputs; must be alphanumeric.
while True:
    try:
        words = input()
        # If input is empty
        if not words:
            raise ValueError("You did not enter anything. Please try again.")
        # If input does not contain only alphanumeric.
        elif not words.isalnum():
            raise ValueError("Only alphanumeric allowed. Please try again.")
        break
    except ValueError as e: # to catch any other errors
        print(e)
        
numtweet = 100

print('\nLoading, please wait......')

scrapeHash(words, numtweet)

print('\nScraping has completed!')

Enter Twitter HashTag to search for.
taylorswift

Loading, please wait......

====Tweet 1====
Tweet ID:1506981702425624582
Userame:longhairIarry
Display Name:Mymi 🏠 the chaotic mom
Location:No stunt Larrie┊19┊🏳‍🌈
Follower Count:516
Following Count:482
Tweet Text:@L_TaylorSwift_S @sunflowhrry Eh mes mentions
Hashtags Used:[]

====Tweet 2====
Tweet ID:1506981623799160834
Userame:L_TaylorSwift_S
Display Name:Dee 🦋 TPWK²⁸ (Taylor's Version) 🧣
Location:she/her
Follower Count:202
Following Count:402
Tweet Text:@sunflowhrry @longhairIarry C'est possible tu me fous le doute ptdr
Hashtags Used:[]

====Tweet 3====
Tweet ID:1506981514659209222
Userame:sunflowhrry
Display Name:Zozo 🏠 TRACK 13 IS MINE
Location:Princess Park 📌
Follower Count:180
Following Count:250
Tweet Text:@L_TaylorSwift_S @longhairIarry je suis sur y’avait une musique entre les deux hier
Hashtags Used:[]

====Tweet 4====
Tweet ID:1506981422938136578
Userame:L_TaylorSwift_S
Display Name:Dee 🦋 TPWK²⁸ (Taylor's Version) 🧣
Location:


====Tweet 31====
Tweet ID:1506975881763323904
Userame:SISTER78769579
Display Name:SISTER
Location:
Follower Count:62
Following Count:43
Tweet Text:TZUYU MELODY PROJECT “ME! (Taylor Swift)” Cover by TZUYU (Feat. Bang Chan of Stray Kids)
*views 24,000,866 👍1,630,000
*https://t.co/9Rp0KYLQYx…

#쯔위 #ツウィ #TZUYU #จื่อวี #子瑜
#TZUYU_Melody_Project
#TaylorSwift https://t.co/utgtW1EagA
Hashtags Used:[]

====Tweet 32====
Tweet ID:1506975663378497538
Userame:techguypaul
Display Name:Paul
Location:California, USA
Follower Count:374
Following Count:358
Tweet Text:You Are In Love is a criminally underrated song #taylorswift
Hashtags Used:['taylorswift']

====Tweet 33====
Tweet ID:1506975249241321478
Userame:nsayingus
Display Name:so much of summer love
Location:
Follower Count:7
Following Count:97
Tweet Text:IS THERE ANYONE GOING TO THE TAYLOR SWIFT CANDLELIGHT IN MELBOURNE 😭😭😭??? #taylorswift #candlelight #melbourne
Hashtags Used:['taylorswift', 'candlelight', 'melbourne']

====Tweet 34====
Tweet I

====Tweet 80====
Tweet ID:1506953786967085061
Userame:SawanttSaili
Display Name:Saili Sawantt - Architectural Writer & Marketer
Location:Mumbai, India
Follower Count:61
Following Count:144
Tweet Text:Best writing motivation song? Taylor Swift
Best anger management songs? Taylor Swift
Best hard break songs? Taylor Swift
Tell me one thing this woman can’t do ❤️💯🙌🏻 #TaylorSwift #TaylorsVersion #TaylorSwiftIsOverParty #music
Hashtags Used:['TaylorSwift', 'TaylorsVersion', 'TaylorSwiftIsOverParty', 'music']

====Tweet 81====
Tweet ID:1506953564870135809
Userame:shawmilaportal
Display Name:Portal Shawmila Brasil
Location:portalshawmilabra@gmail.com
Follower Count:5927
Following Count:35
Tweet Text:#洋楽ニュース
#taylorswift　新曲一部解禁
#JustinBieber 　11月13日大阪追加公演発表
#HarryStyles 　5月20日ニュー・アルバム発売
#ArianaGrande 　@rembeauty より新商品発表
#ShawnMendes  3月31日新曲配信
#SofiaCarson 3月25日 デビューアルバム配信
#CamilaCabello 4月8日 新作FAMILIA.発売 https://t.co/e2jOeiyKAX
Hashtags Used:['洋楽ニュース', 'taylorswift', 'JustinBieber', 'HarryStyl


Scraping has completed!


# Query by Username

Creation of queries using Tweepy API

Function is focused on completing the query then providing a CSV file of that query using pandas

And then inserting the data directly into PostgreSQL database

In [8]:
# function to display data of each tweet
def printtweetdata(n, ith_tweet):
        print()
        print(f"====Tweet {n}====")
        print(f"Tweet ID:{ith_tweet[0]}")
        print(f"Userame:{ith_tweet[1]}")
        print(f"Display Name:{ith_tweet[2]}")
        print(f"Location:{ith_tweet[3]}")
        print(f"Follower Count:{ith_tweet[4]}")
        print(f"Following Count:{ith_tweet[5]}")
        print(f"Tweet Text:{ith_tweet[6]}")
        print(f"Hashtags Used:{ith_tweet[7]}")


# function to perform data extraction
def scrapeUser(username, numtweet):

        # Creating DataFrame using pandas
        tweetsDF = pd.DataFrame(columns=['tweet_id',
                                   'username',
                                   'display_name',
                                   'location',
                                   'followers',
                                   'following',
                                   'text',
                                   'hashtags'])

        # We are using .Cursor() to search through twitter for the required tweets.
        # The number of tweets can be restricted using .items(number of tweets)
        tweets = tweepy.Cursor(api.user_timeline,
                               screen_name=username,
                               tweet_mode='extended').items(numtweet)


        # .Cursor() returns an iterable object.
        # Each item in the iterator has various attributes
        # that you can access to get information about each tweet
        list_tweets = [tweet for tweet in tweets]

        # Counter to maintain Tweet Count
        i = 1
        
        # we will iterate over each tweet in the list for extracting information about each tweet
        val = []
        for tweet in list_tweets:
            tweet_id = tweet.id
            username = tweet.user.screen_name
            display_name = tweet.user.name
            location = tweet.user.location
            followers = tweet.user.followers_count
            following = tweet.user.friends_count
            hashtags = tweet.entities['hashtags']
            
            # Retweets can be distinguished by a retweeted_status attribute,
            # in case it is an invalid reference, except block will be executed
            try:
                text = tweet.retweeted_status.full_text
            except AttributeError:
                text = tweet.full_text
            
            # extracting all hashtags in the tweet
            # because there may be multiple hashtags in a tweet
            hashtext = list()            
            for j in range(0, len(hashtags)):
                hashtext.append(hashtags[j]['text'])
                
            # Here we are appending all the extracted information in the DataFrame
            ith_tweet = [tweet_id, username,
                         display_name, location,
                         followers, following,
                         text, hashtext]
            tweetsDF.loc[len(tweetsDF)] = ith_tweet

            # Function call to print tweet data on screen
            printtweetdata(i, ith_tweet)
            i = i+1
            
            val.append(ith_tweet)

        # we will save our database as a CSV file.
        tweetsDF.to_csv('{}-tweets_{}.csv'.format(username, datetime.now().strftime("%Y-%m-%d_%H%M%S")), sep=',', index=False)

        # SQL command to insert scrapped tweet data into PostgreSQL database
        sql = '''
        INSERT INTO twitterDatabase(tweet_id, username, display_name, location, followers_count, following_count,
        tweet_text, hashtags)
        VALUES(%s,%s,%s,%s,%s,%s,%s,%s
        ) ON CONFLICT(tweet_id) DO NOTHING
        '''
        
        # Create connection engine
        engine = db.create_engine('postgresql://postgres:admin@localhost:5433/tweet-crawl') 
        #user postgres, password admin - change yours accordingly. Also check the port number
        conn = engine.raw_connection()
        
        # Initialize connection to PostgreSQL
        cur = conn.cursor()
        
        # Create cursor to execute SQL commands
        cur.executemany(sql, val)

        # Close communication with server
        conn.commit()
        cur.close()
        conn.close()

In [9]:
# Enter Username
print("Enter Twitter Username to search for.")

# To make sure user enters only valid inputs; must be alphanumeric.
while True:
    try:
        words = input()
        # If input is empty
        if not words:
            raise ValueError("You did not enter anything. Please try again.")
        # If input does not contain only alphanumeric.
        elif not words.isalnum():
            raise ValueError("Only alphanumeric allowed. Please try again.")
        break
    except ValueError as e: # to catch any other errors.
        print(e)
        
numtweet = 100

print('\nLoading, please wait......')

scrapeUser(words, numtweet)

print('\nScraping has completed!')

Enter Twitter Username to search for.
taylorswift13

Loading, please wait......

====Tweet 1====
Tweet ID:1485714267269738498
Userame:taylorswift13
Display Name:Taylor Swift
Location:
Follower Count:90404974
Following Count:0
Tweet Text:PS I wrote this tweet all by myself in case you were wondering 😑
Hashtags Used:[]

====Tweet 2====
Tweet ID:1485714265675812866
Userame:taylorswift13
Display Name:Taylor Swift
Location:
Follower Count:90404974
Following Count:0
Tweet Text:@DamonAlbarn I was such a big fan of yours until I saw this. I write ALL of my own songs. Your hot take is completely false and SO damaging. You don’t have to like my songs but it’s really fucked up to try and discredit my writing. WOW. https://t.co/t6GyXBU2Jd
Hashtags Used:[]

====Tweet 3====
Tweet ID:1470506710901084162
Userame:taylorswift13
Display Name:Taylor Swift
Location:
Follower Count:90404974
Following Count:0
Tweet Text:*don’t say it, don’t say it OKAY I’m saying it:* I’M FEELIN 32. And Alana is feeling 30. 


====Tweet 27====
Tweet ID:1443619878062235650
Userame:taylorswift13
Display Name:Taylor Swift
Location:
Follower Count:90404974
Following Count:0
Tweet Text:Got some news that I think you’re gonna like - My version of Red will be out a week earlier than scheduled (including the 4 disc vinyl) on November 12th ! Can’t wait to celebrate the 13th with you and our new/old autumn heartbreak album🧣😍 🎥 🎥

https://t.co/FVp6xizaOG
Hashtags Used:[]

====Tweet 28====
Tweet ID:1440046696394682375
Userame:taylorswift13
Display Name:Taylor Swift
Location:
Follower Count:90404974
Following Count:0
Tweet Text:It’s true, I signed them all and it’s also true that I may never write the same again, as my hand is now frozen in the permanent shape of a claw. All for you 😘 https://t.co/mnSkHyNCy6
Hashtags Used:[]

====Tweet 29====
Tweet ID:1438854975485059077
Userame:taylorswift13
Display Name:Taylor Swift
Location:
Follower Count:90404974
Following Count:0
Tweet Text:Hi! Saw you guys got Wildest Dreams tren

====Tweet 75====
Tweet ID:1398248516410130433
Userame:taylorswift13
Display Name:Taylor Swift
Location:
Follower Count:90404974
Following Count:0
Tweet Text:*Clover blooms in the fields/spring breaks loose/the time is near...* EVERMORE ALBUM VINYL IS OUT TODAY!! You can get it at your fav indie record store, Target, Walmart &amp; Amazon ✨ and if you’re feeling even more generous, go ahead and stream it too! That would be cool! https://t.co/Xd1LWNAjB5
Hashtags Used:[]

====Tweet 76====
Tweet ID:1392577233546530816
Userame:taylorswift13
Display Name:Taylor Swift
Location:
Follower Count:90404974
Following Count:0
Tweet Text:I got a list of names and yours is in... glittery gel pen with hearts drawn around it 😇 @Maisie_Williams @wiffygriffy @Olivia_Rodrigo https://t.co/EgT50dBa7n
Hashtags Used:[]

====Tweet 77====
Tweet ID:1392258056080302090
Userame:taylorswift13
Display Name:Taylor Swift
Location:
Follower Count:90404974
Following Count:0
Tweet Text:We meet up every Tuesday night for di


Scraping has completed!
