In this Notebook we fetch Twitter tweets and users using Twitter API

In [4]:
import tweepy
import pandas as pd
import time

def setup_twitter_api(bearer_token="AAAAAAAAAAAAAAAAAAAAAEVoywEAAAAA2vA7jxSZ3wNvckLTjgkMpVGOnk8%3DoghnW868H0RDSsf8x5wibHapA2kA3db2dgsSF5y3IPAxGSFh7E"):
    """Set up Twitter API client."""
    client = tweepy.Client(bearer_token=bearer_token)
    return client

def search_tweets(client, query, max_results=100):
    """Search for tweets matching the query."""
    tweets = client.search_recent_tweets(
        query=query,
        max_results=max_results,
        tweet_fields=['created_at', 'public_metrics', 'author_id']
    )
    return tweets.data

def get_user_details(client, user_ids):
    """Get details about users by their IDs."""
    users = client.get_users(
        ids=user_ids,
        user_fields=['name', 'username', 'created_at', 'public_metrics']
    )
    return users.data

def tweets_to_dataframe(tweets):
    """Convert tweet data to pandas DataFrame."""
    data = []
    for tweet in tweets:
        data.append({
            'tweet_id': tweet.id,
            'text': tweet.text,
            'created_at': tweet.created_at,
            'author_id': tweet.author_id,
            'retweet_count': tweet.public_metrics['retweet_count'],
            'reply_count': tweet.public_metrics['reply_count'],
            'like_count': tweet.public_metrics['like_count'],
            'quote_count': tweet.public_metrics['quote_count']
        })
    return pd.DataFrame(data)

# Example usage of the script
if __name__ == "__main__":
    # Initialize API client with your bearer token
    client = setup_twitter_api()

    # Define search query
    search_query = "data science"  # You can change this to any topic you want
    max_results = 100  # Number of tweets to retrieve (max 100 per request)

    print(f"Searching for tweets about '{search_query}'...")

    # Search for tweets
    tweets = search_tweets(client, search_query, max_results)

    if tweets:
        print(f"Found {len(tweets)} tweets")

        # Extract unique author IDs from the tweets
        author_ids = list(set([tweet.author_id for tweet in tweets]))
        print(f"These tweets were created by {len(author_ids)} unique authors")

        # Get user details for these authors
        users = get_user_details(client, author_ids)

        if users:
            print(f"Retrieved details for {len(users)} users")

            # Create a small dataframe of user information
            user_data = []
            for user in users:
                user_data.append({
                    'user_id': user.id,
                    'name': user.name,
                    'username': user.username,
                    'followers_count': user.public_metrics['followers_count'],
                    'following_count': user.public_metrics['following_count'],
                    'tweet_count': user.public_metrics['tweet_count']
                })
            df_users = pd.DataFrame(user_data)

            # Convert tweets to DataFrame
            df_tweets = tweets_to_dataframe(tweets)

            # Save data to CSV files
            df_tweets.to_csv(f"{search_query.replace(' ', '_')}_tweets.csv", index=False)
            df_users.to_csv(f"{search_query.replace(' ', '_')}_users.csv", index=False)

            print(f"Saved tweet data to '{search_query.replace(' ', '_')}_tweets.csv'")
            print(f"Saved user data to '{search_query.replace(' ', '_')}_users.csv'")

            # Display sample of the data
            print("\nSample of tweet data:")
            print(df_tweets.head(3))

            print("\nSample of user data:")
            print(df_users.head(3))
        else:
            print("Could not retrieve user details")
    else:
        print("No tweets found for this query")

Searching for tweets about 'data science'...
Found 100 tweets
These tweets were created by 89 unique authors
Retrieved details for 89 users
Saved tweet data to 'data_science_tweets.csv'
Saved user data to 'data_science_users.csv'

Sample of tweet data:
              tweet_id                                               text  \
0  1913171407833379034  RT @omoalhajaabiola: Master in Business Analyt...   
1  1913171220226277705  → Context. \n→ Full datasets. \n→ The studies ...   
2  1913171217328021684  I'm a scientist.  \nEvery time I see a viral h...   

                 created_at            author_id  retweet_count  reply_count  \
0 2025-04-18 10:03:03+00:00  1185728337873899520              6            0   
1 2025-04-18 10:02:18+00:00            282965354              0            1   
2 2025-04-18 10:02:17+00:00            282965354              0            1   

   like_count  quote_count  
0           0            0  
1           0            0  
2           0            0  



In [None]:
# Load CSVs
df_tweets = pd.read_csv("data_science_twitter_tweets.csv")
df_users = pd.read_csv("data_science_twitter_users.csv")

print("Sample Tweets:")
print(df_tweets.head())

print("Sample Users:")
print(df_users.head())


Sample Tweets:
              tweet_id                                               text  \
0  1913171407833379034  RT @omoalhajaabiola: Master in Business Analyt...   
1  1913171220226277705  → Context. \n→ Full datasets. \n→ The studies ...   
2  1913171217328021684  I'm a scientist.  \nEvery time I see a viral h...   
3  1913171215172141355  @Aruu2578 Lol 😂 umm well hey data science guy ...   
4  1913171038612918426  RT @VigilantFox: NEW: RFK Jr. dismantles the m...   

                  created_at            author_id  retweet_count  reply_count  \
0  2025-04-18 10:03:03+00:00  1185728337873899520              6            0   
1  2025-04-18 10:02:18+00:00            282965354              0            1   
2  2025-04-18 10:02:17+00:00            282965354              0            1   
3  2025-04-18 10:02:17+00:00  1912298092046802944              0            0   
4  2025-04-18 10:01:35+00:00  1890964286228353024            722            0   

   like_count  quote_count  
0     