# Twitter Scraping script

Running this file would scrape twitter to get content of tweets

`Don't forget to install libraries!`

In [1]:
import tweepy
import pandas as pd
from configparser import ConfigParser
import csv

# API Config
config = ConfigParser(interpolation=None)
config.read('../config.ini')
BEARER_TOKEN = config['keys']['BEARER_TOKEN']
consumer_key = config['keys']['CONSUMER_KEY']
consumer_secret = config['keys']['CONSUMER_SECRET']
access_token = config['keys']['ACCESS_TOKEN']
access_token_secret = config['keys']['ACCESS_TOKEN_SECRET']

client = tweepy.Client(
    bearer_token=BEARER_TOKEN,
    wait_on_rate_limit=True
)

# Define search parameters
search_query = '("climate emergency" OR "carbon emissions") -is:retweet -is:reply -has:links lang:en'
no_of_tweets = 100

try:
    # Fetch
    tweets = client.search_recent_tweets(
        query=search_query,
        max_results=no_of_tweets,
        tweet_fields=['created_at', 'public_metrics', 'text'],
        user_fields=['name', 'username'],
        expansions=['author_id']
    )
    
    # Extract users data
    users = {u.id: u for u in tweets.includes['users']}
    
    # Prepare tweet data
    tweets_data = []
    for tweet in tweets.data:
        user = users[tweet.author_id]
        tweets_data.append({
            "User": user.name,
            "Username": user.username,
            "Date Created": tweet.created_at,
            "Likes": tweet.public_metrics['like_count'],
            "Tweet": tweet.text
        })
    
    # Create DataFrame
    tweets_df = pd.DataFrame(tweets_data)
    
except tweepy.TweepyException as e:
    print(f"API Error: {e}")
except Exception as e:
    print(f"General Error: {e}")

tweets_df

Rate limit exceeded. Sleeping for 237 seconds.


Unnamed: 0,User,Username,Date Created,Likes,Tweet
0,El jefe,SoylaJefe,2025-06-29 13:02:19+00:00,0,Riddle me this climate clowns... why in most c...
1,Anvesha,p_pezzonovante,2025-06-29 12:52:08+00:00,1,The world is in such a bad place that climate ...
2,Gemma Elliott,drgemmaelliott,2025-06-29 12:36:04+00:00,0,Fresh climate emergency hellscape is that (all...
3,Orville Bartuska,orville67416,2025-06-29 12:24:08+00:00,0,Our digital habits leave a big environmental m...
4,Đoàn Tần,doantan137,2025-06-29 11:53:09+00:00,0,Renewable energy refers to energy derived from...
...,...,...,...,...,...
95,Vins,Vinitaa890,2025-06-27 03:56:06+00:00,0,From reducing carbon emissions to increasing c...
96,Mortimer,mortimer_1,2025-06-27 03:43:36+00:00,679,The Canadian and BC gov’t lecture Canadians ab...
97,Alana Shepherd,shepherd_a51338,2025-06-27 02:05:06+00:00,1,The burning police car pollutes the environmen...
98,Carl Martin,CarlMartin99589,2025-06-27 02:01:50+00:00,0,The international agreement aims to reduce car...


In [2]:
tweets_df.to_csv('../data/scraped/tweet_content6.csv', index=False, quoting=csv.QUOTE_ALL, encoding='utf-8')
print(f"Saved {len(tweets_df)} tweets to tweet_content6.csv")

Saved 100 tweets to tweet_content6.csv


### Merge all scraped data

In [3]:
import pandas as pd
import glob
import os

# 1. Set your directory path containing the CSV files
input_dir = "../data/scraped"

# 2. Get all CSV files in the directory
all_csv_files = glob.glob(os.path.join(input_dir, "*.csv"))

# 3. Verify we have exactly 7 files (optional)
if len(all_csv_files) != 7:
    print(f"Found {len(all_csv_files)} files instead of 7. Continuing anyway...")

# 4. Create an empty list to store DataFrames
dfs = []

# 5. Read each CSV file and append to the list
for csv_file in all_csv_files:
    try:
        df = pd.read_csv(csv_file)
        # Add filename as a column to track source (optional)
        df['source_file'] = os.path.basename(csv_file)
        dfs.append(df)
        print(f"Processed: {csv_file} | Rows: {len(df)}")
    except Exception as e:
        print(f"Error reading {csv_file}: {str(e)}")

# 6. Concat all DataFrames
merged_df = pd.concat(dfs, ignore_index=True)

# 7. Save the merged DataFrame to a new CSV file
output_path = os.path.join("../data", "tweets.csv")
merged_df.to_csv(output_path, index=False)

print("\nMerging complete!")
print(f"Total files merged: {len(dfs)}")
print(f"Total rows in merged file: {len(merged_df)}")
print(f"Merged file saved to: {output_path}")

Processed: ../Data\tweets_content.csv | Rows: 100
Processed: ../Data\tweet_content2.csv | Rows: 100
Processed: ../Data\tweet_content3.csv | Rows: 3
Processed: ../Data\tweet_content4.csv | Rows: 100
Processed: ../Data\tweet_content5.csv | Rows: 38
Processed: ../Data\tweet_content6.csv | Rows: 100
Processed: ../Data\tweet_details.csv | Rows: 99

Merging complete!
Total files merged: 7
Total rows in merged file: 540
Merged file saved to: ../Data\merged_data.csv
