In [None]:
%pip install tweepy
%pip install googletrans==4.0.0-rc1

In [2]:
import requests
import json
import tweepy
from googletrans import Translator
from datetime import datetime
from dateutil.relativedelta import relativedelta
import csv

In [4]:
def translate_text(text, target_language="en"):
    """ This function is used to convert text to the target language
        in default it is set to english .

    Parameters:
        text(string): The text to be translated.
        target_language(string, optional): The language to which the text is to be translated. Defaults to english-"en".

    Returns:
        string : The translated text in english language .
    """
    translator = Translator() #creating an object of Translator class
    result = translator.translate(text, dest=target_language) #translating the text to the target language
    return result.text

def bearer_oauth(r):
    """ This function is used to set the authorization header for the request.

    Parameters:
        r : The request object.
    """

    r.headers["Authorization"] = f"Bearer {bearer_token}" #setting the authorization header
    r.headers["User-Agent"] = "v2UserLookupPython" #setting the user agent
    return r

def connect_to_endpoint(url):
    """ This function is used to connect to the endpoint and get the response.

    Parameters:
        url (string): The url of the endpoint.

    Raises:
        Exception: If the request returns an error.

    Returns:
        json : The response of the request in json format.
    """
    response = requests.request("GET", url, auth=bearer_oauth,) #making a get request to the endpoint
    if response.status_code != 200: #checking if the response is successful
        raise Exception(
            "Request returned an error: {} {}".format(
                response.status_code, response.text
            )
        )
        
    #returning the response in json format
    return response.json()


narendramodi ==> 18839785


In [None]:
def Extract_tweet(username, total, n, date):
    """This function extracts the tweets from the recent date of the given 
        username and stores it in a csv file.

    Parameters:
        username(string): Username of the twitter account
        total(integer): Total number of tweets to be extracted
        n(integer): No.of months from the recent date
        date(date): Recent date of the tweet extraction(YYYY-MM-DD)
        
    returns:
        string: The status of the extraction process
    """
    
    # Add username to the url
    usernames = "usernames="+username 
    user_fields = "user.fields=description,created_at,id,location,name,public_metrics,url,username,verified" 
    
    # Request URL
    url = "https://api.twitter.com/2/users/by?{}&{}".format(usernames, user_fields)
    json_response = connect_to_endpoint(url) #Connect to the endpoint
    response=json.dumps(json_response, indent=4, sort_keys=True) #Convert the response to json format
    if("errors" in response): #Check for invalid username
        print("Invalid Username")
        return "Invalid Username"
    else:
        print(response)
        id=json_response['data'][0]['id'] #Extracting the user id
        print("{} ==> {}".format(user,id))

    print("Authentication Successful")
    
    # Create a client to access the Twitter API
    client = tweepy.Client(bearer_token=bearer_token)

    # Get the recent date and the date n months ago
    recent_date= datetime.strptime(date, "%Y-%m-%d")
    old_date=recent_date-relativedelta(months=n)

    # Formatting the date in format required by the API
    recent_formatted= recent_date.strftime("%Y-%m-%dT%H:%M:%SZ")
    old_formatted= old_date.strftime("%Y-%m-%dT%H:%M:%SZ")

    print("Recent Date: ",recent_formatted)
    print("Old Date: ",old_formatted)

    header = ['Tweet_id','Text'] #Header of the csv file
    #Provide the file path to store the extracted data
    file_path = r'C:\Users\Mugun\Desktop\Twitter_data.csv'    
    with open(file_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(header)
        for response in tweepy.Paginator(client.get_users_tweets,id=id, max_results=100,
                                         tweet_fields="created_at",
                                         start_time=old_formatted,  
                                         end_time=recent_formatted,
                                         limit=total//100):     # Extracting the tweets via pagination
            for i in range(len(response.data)):
                tweet=response.data[i] #Extracting the tweet
                writer.writerow([tweet['id'],translate_text(tweet['text'])]) #Writing the tweet to the csv file

    print("Data Extraction Successful")
    return "Data Extraction Successful"

In [None]:
bearer_token="AAAAAAAAAAAAAAAAAAAAAOzDswEAAAAAIaN7v%2BhnmAybrW4rIefJK3usOqc%3D5AtyjzHTxtZ1PR26j4vH9fNchne4ienJLBLRZypdihUOtldSEz"
user= "narendramodi"
total=5
n=6
date = '2024-03-20'
Extract_tweet(user, total, n, date)