# code to extract tatamotor tweet from rapidapi and to convert json format data into csv format

In [2]:
pip install --upgrade numpy


Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import json
import os
import requests
from datetime import datetime, timedelta, timezone
from apscheduler.schedulers.blocking import BlockingScheduler

# RapidAPI tweet fetching function
def fetch_real_time_tweets():
    ## Define your API URL
    url = ""
    
    # Replace these with the actual values from your RapidAPI account
    headers = {
        "x-rapidapi-key": "304de52c13msh8fe6603818f4064p18d63bjsn8d62023414ae",
        "x-rapidapi-host": "twitter-search-only.p.rapidapi.com"
    }
    
    # Define query parameters (if needed)
    querystring = {"query": "tatamotors", "search_type": "Latest"}

    response = requests.get(url, headers=headers, params=querystring)
    
    if response.status_code == 200:
        return response.json()  # Returns the tweet data in JSON format
    else:
        print(f"Error: Unable to fetch tweets (status code: {response.status_code})")
        return None

# Function to filter tweets from the last 5 days and present day
def filter_recent_tweets(tweets):
    # Get the current date (timezone-aware) and calculate the date 5 days ago
    end_date = datetime.now(timezone.utc)  # Make the current time timezone-aware
    start_date = end_date - timedelta(days=5)
    
    # Convert 'created_at' field to datetime and filter the tweets
    filtered_tweets = [
        tweet for tweet in tweets 
        if 'created_at' in tweet and start_date <= datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S %z %Y') <= end_date
    ]
    
    return filtered_tweets

# Function to process the incoming JSON data and store in CSV
def process_tweet_data(json_data):
    # Extract the 'timeline' key which contains the tweets
    tweets = json_data.get("timeline", [])
    
    if not tweets:
        print("No tweets found in the data.")
        return
    
    # Filter the tweets for the last 5 days and present day
    recent_tweets = filter_recent_tweets(tweets)
    
    # Convert the filtered tweets list to a pandas DataFrame
    df = pd.json_normalize(recent_tweets)
    
    if df.empty:
        print("No recent tweets to save.")
        return
    
    # Check if the CSV file already exists
    file_exists = os.path.isfile('tweets_tatamotor.csv')
    
    # If file exists, append the new data; otherwise, create a new file
    if file_exists:
        df.to_csv('tweets_tatamotor.csv', mode='a', header=False, index=False)
    else:
        df.to_csv('tweets_tatamotor.csv', mode='w', header=True, index=False)
    
    print("Tweet data successfully saved to CSV.")

# Function to fetch and process real-time tweets
def fetch_and_process_tweets():
    # Fetch real-time tweets from RapidAPI
    real_time_data = fetch_real_time_tweets()

    if real_time_data:
        # Process and store the tweet data into CSV
        process_tweet_data(real_time_data)

# Scheduler setup
if __name__ == "__main__":
    scheduler = BlockingScheduler()
    
    # Schedule the tweet fetching function to run every 4 hours
    scheduler.add_job(fetch_and_process_tweets, 'interval', hours=4)
    
    # Start the scheduler
    print("Scheduler started. Fetching tweets every 4 hours.")
    fetch_and_process_tweets()  # Fetch tweets immediately on start
    scheduler.start()


Scheduler started. Fetching tweets every 4 hours.
Tweet data successfully saved to CSV.
