## Packages to Import

In [6]:
import snscrape.modules.twitter as sntwitter
import datetime as dt
import pandas as pd
import warnings
from datetime import datetime
warnings.filterwarnings('ignore')

## A function to scrape tweets in python

![img](snscrape_tweet_attribute.png)

In [2]:
def scrape_hashtag_tweets(hashtag, start_date, end_date):
    """
    Scrapes all tweets containing a certain hashtag within a specified time frame,
    ignoring case sensitivity.
    Args:
        hashtag (str): the hashtag to scrape, without the "#" symbol
        start_date (str): the start date in "YYYY-MM-DD" format
        end_date (str): the end date in "YYYY-MM-DD" format
    Returns:
        List of dictionaries, with each dictionary representing a single tweet and containing
        the following keys:
            - id: the tweet's unique identifier
            - content: the tweet's text content
            - date: the datetime object representing the tweet's date and time
            - url: the URL of the tweet
            - username: the username of the tweet's author
    """
    # Convert the start and end dates to datetime objects
    start_dt = dt.datetime.strptime(start_date, "%Y-%m-%d")
    end_dt = dt.datetime.strptime(end_date, "%Y-%m-%d")

    # Create a list to store the scraped tweets
    tweets = []

    # Iterate over all tweets containing the specified hashtag
    for tweet in sntwitter.TwitterSearchScraper(f"#{hashtag} since:{start_date} until:{end_date}").get_items():
        # Ignore tweets that don't match the hashtag (ignoring case sensitivity)
        if hashtag.lower() not in tweet.content.lower():
            continue

        # Convert the tweet's date string to a datetime object
        #tweet_date = dt.datetime.strftime(tweet.date[:19], "%Y-%m-%dT%H:%M:%S")

        # Ignore tweets outside the specified time frame
        #if tweet_date < start_dt or tweet_date > end_dt:
            #continue

        # Add the relevant information about the tweet to the list
        tweets.append({
            "id": tweet.id,
            "content": tweet.content,
            "date": tweet.date,
            "username": tweet.user.username,
            "userdisplayname": tweet.user.displayname,
            "userlocation": tweet.user.location,
            "retweetCount": tweet.retweetCount,
            "likeCount": tweet.likeCount,
            "language": tweet.lang,
            "source": tweet.source,
            "retweetedTweet": tweet.retweetedTweet,
            "quotedTweet": tweet.quotedTweet,
            "mentionedUsers": tweet.mentionedUsers
        })

    return tweets


### Using the Function to get tweets

In [7]:
# input the hashtag you want to scrape
hashtag = input("Enter the hashtag: ") 

#input the start date from when you want to scraping
start_date = input("Enter the start date in this format yyyy-mm-dd: ")

#input the end date from when you want to end the scraping
end_date = input("Enter the start date in this format yyyy-mm-dd: ")

# Scrape tweets and convert to DataFrame
scraped_tweets = scrape_hashtag_tweets(hashtag, start_date, end_date)
tweets = pd.DataFrame(scraped_tweets)

# Print the DataFrame
print(tweets.head())

Enter the hashtag: #grateful
Enter the start date in this format yyyy-mm-dd: 2023-03-01
Enter the start date in this format yyyy-mm-dd: 2023-03-05
                    id                                            content  \
0  1632166647518601218  A sunny day with lots of yellow blooms!  Thank...   
1  1632166578664947713  #SelfCareSaturday - cheering for everyone! 🙌🏼🫱...   
2  1632166428697825280                #LoveOnTourSydneyN2 #tpwk #grateful   
3  1632164554556973057  In calmness lies true pleasure.\n\n#quoteofthe...   
4  1632164213329129472  My favourite photo from last night. Pure joy. ...   

                       date         username          userdisplayname  \
0 2023-03-04 23:50:27+00:00    JaneForZville       JaneForZvilleMayor   
1 2023-03-04 23:50:11+00:00  TiffanyNeal_PhD             Tiffany Neal   
2 2023-03-04 23:49:35+00:00  KatieJa91528547                      Kat   
3 2023-03-04 23:42:08+00:00   tashisherpa007                       TS   
4 2023-03-04 23:40:47+00:

In [5]:
len(tweets)

77

In [None]:
tweets.sort_values(by='date',ascending=False) 

## Scraping a certain number tweets based on a hashtag within a timeframe

In [None]:
# Creating list to append tweet data to
tweets_list2 = []

hashtag = '#basic' # the hashtag you want to scrape

# Using TwitterSearchScraper to scrape data and append tweets to list
#tweets were scraped from 2020-06-01 till 2020-07-31

for i,tweet in enumerate(sntwitter.TwitterSearchScraper('{hashtag} since:2020-06-01 until:2020-07-31').get_items()):
    if i>500: #here 500 is the number of tweets you want to scrape
        break
    tweets_list2.append([tweet.date, tweet.id, tweet.content, tweet.user.username])
    
# Creating a dataframe from the tweets list above
tweets_df2 = pd.DataFrame(tweets_list2, columns=['Datetime', 'Tweet Id', 'Text', 'Username'])

tweets_df2.head()

## For more details

for more details on twitter scraping, check the [resources](https://betterprogramming.pub/how-to-scrape-tweets-with-snscrape-90124ed006af)