In [None]:
import tweepy
import time

# Liste des tokens pour contourner les limites
tokens = ["TOKEN1", "TOKEN2", "TOKEN3"]
clients = [tweepy.Client(bearer_token=token) for token in tokens]
current_client_index = 0

def get_client():
    """Retourne le client actuel en alternant les tokens."""
    global current_client_index
    client = clients[current_client_index]
    current_client_index = (current_client_index + 1) % len(clients)
    return client

def fetch_tweets(query, max_results=100):
    """Extrait des tweets avec les informations des auteurs."""
    client = get_client()
    tweet_data = []

    try:
        # Requête pour les tweets récents
        tweets = client.search_recent_tweets(
            query=query,
            tweet_fields=["created_at", "author_id", "public_metrics", "text", "conversation_id", "entities"],
            user_fields=["id", "name", "username", "location", "verified", "description", "public_metrics", "created_at"],
            expansions=["author_id"],  # Inclut les données des utilisateurs
            max_results=max_results
        )

        if tweets.data:
            # Crée un dictionnaire des utilisateurs
            users = {user["id"]: user for user in tweets.includes.get("users", [])}

            for tweet in tweets.data:
                author_info = users.get(tweet.author_id, {})
                tweet_info = {
                    "tweet_id": tweet.id,
                    "tweet_text": tweet.text,
                    "tweet_date": tweet.created_at,
                    "nbr_likes": tweet.public_metrics["like_count"],
                    "nbr_retweets": tweet.public_metrics["retweet_count"],
                    "nbr_replies": tweet.public_metrics["reply_count"],
                    "conversation_id": tweet.conversation_id,
                    "hashtags": [h["tag"] for h in tweet.entities.get("hashtags", [])] if tweet.entities else [],
                    "mentions": [m["username"] for m in tweet.entities.get("mentions", [])] if tweet.entities else [],
                    "author_id": tweet.author_id,
                    "author_name": author_info.get("name"),
                    "author_username": author_info.get("username"),
                    "author_verified": author_info.get("verified"),
                    "author_followers": author_info.get("public_metrics", {}).get("followers_count"),
                    "author_following": author_info.get("public_metrics", {}).get("following_count"),
                    "author_tweets_count": author_info.get("public_metrics", {}).get("tweet_count"),
                    "author_creation_date": author_info.get("created_at"),
                    "author_location": author_info.get("location"),
                    "author_bio": author_info.get("description"),
                }
                tweet_data.append(tweet_info)

    except tweepy.TooManyRequests:
        print("Limite atteinte, pause de 15 minutes...")
        time.sleep(15 * 60)
        return fetch_tweets(query, max_results)
    except Exception as e:
        print(f"Erreur lors de l'extraction : {e}")

    return tweet_data

if __name__ == "__main__":
    query = "palestine"
    all_tweets = []

    for _ in range(10):  # Nombre de cycles d'extraction
        tweets = fetch_tweets(query)
        all_tweets.extend(tweets)
        print(f"Extrait {len(tweets)} tweets.")
        time.sleep(5)  # Pause pour limiter la vitesse des requêtes

    # Sauvegarde des données au format JSON
    import json
    with open("tweets_data.json", "w", encoding="utf-8") as file:
        json.dump(all_tweets, file, ensure_ascii=False, indent=4)

    print(f"Extraction terminée. {len(all_tweets)} tweets sauvegardés.")


Extrait 100 tweets.
Limite atteinte, pause de 15 minutes...
Extrait 99 tweets.
Extrait 99 tweets.
Limite atteinte, pause de 15 minutes...
Extrait 100 tweets.
Limite atteinte, pause de 15 minutes...


KeyboardInterrupt: 

In [2]:
print(tweets)

[{'tweet_id': 1863599925746872746, 'tweet_text': 'RT @mrubin1971: "Biafra? Catalonia? Kurdistan? U.S. Shouldn’t Accept Hypocrisy from Countries that Recognized Palestine"\nhttps://t.co/NIOqm…', 'tweet_date': datetime.datetime(2024, 12, 2, 15, 3, 40, tzinfo=datetime.timezone.utc), 'nbr_likes': 0, 'nbr_retweets': 1155, 'nbr_replies': 0, 'conversation_id': 1863599925746872746, 'hashtags': [], 'mentions': ['mrubin1971'], 'author_id': 1566456222873690117, 'author_name': 'henry nwokolo', 'author_username': 'hennfranky3k', 'author_verified': False, 'author_followers': 158, 'author_following': 434, 'author_tweets_count': 2070, 'author_creation_date': datetime.datetime(2022, 9, 4, 16, 7, 5, tzinfo=datetime.timezone.utc), 'author_location': None, 'author_bio': "I don't take nonsens"}, {'tweet_id': 1863599924304052529, 'tweet_text': 'RT @mhdksafa: Child deaths in wars:\n\nIraq: 3,100 in 14 years.\nSyria: 12,000 in 11 years.\nYemen: 3,700 in 7 years.\n\nUkraine:  +520 in 2 year…', 'tweet_date': da

In [4]:
print(len(all_tweets))

398


In [6]:
import pandas as pd

# Assuming all_tweets is a list of dictionaries with tweet data (e.g. [{'tweet': 'some text', 'sentiment': 'positive', 'sentiment_score': 0.98}, ...])
# Convert the list of dictionaries to a DataFrame
all_tweets_df = pd.DataFrame(all_tweets)

# Specify the file path where you want to save the CSV
file_path = "save_tweets.csv"

# Save the DataFrame to a CSV file
all_tweets_df.to_csv(file_path, index=False)

print(f"CSV file saved at: {file_path}")



CSV file saved at: save_tweets.csv
