In [140]:
import tweepy 
import config
import pandas as pd
import tweepy
import re
import spacy
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from textblob import TextBlob

# 1 - Collect des tweets

In [None]:
client = tweepy.Client(bearer_token=config.api_key)

# Search query with keywords and language filter
query = "#F1 redbull (lang:fr OR lang:en) -is:retweet -is:reply" 
max_results = 10  

tweets = client.search_recent_tweets(query=query, tweet_fields=['text'], max_results=max_results)

# Tweets conversion in Pandas DataFrame
tweet_data = []
for tweet in tweets.data:
    tweet_data.append({
        "text": tweet.text
    })
df = pd.DataFrame(tweet_data)

# Saving the DataFrame as a CSV file
df.to_csv("data/tweets.csv", index=False)
print("Données enregistrées dans tweets.csv")

In [146]:
path = 'data/'
df = pd.read_csv(path + 'tweets.csv')
pd.set_option("max_colwidth", None)

In [147]:
df.head(10)

Unnamed: 0,text
0,Formula 1 is bringing the heat to Miami Grand Prix 🔥\n\n🏎️ Can Max Verstappen win again?\n🏎️ Will the new upgrades for McLaren and Mercedes work?\n\nRead our insights OUT NOW!\n\n#F1 #Formula1 #RedBull #Ferrari #CL16 #LewisHamilton #LandoNorris #MiamiGP\n\nhttps://t.co/tNar0iwflY
1,Formula 1 is bringing the heat to Miami Grand Prix 🔥\n\nRead our insights and predictions OUT NOW! 🏎️\n\n#F1 #Formula1 #RedBull #MaxVerstappen #Ferrari #CL16 #Mercedes #LewisHamilton #McLaren #LandoNorris #MiamiGP\n\nhttps://t.co/ZsuTm6aT5J
2,The Miami Grand Prix is almost here and all teams are getting ready for a Sprint Weekend 🏎️🔥\n\nRead our insights for this weeks Formula 1 race \n\n#F1 #Formula1 #RedBull #MaxVerstappen #Ferrari #CL16 #Mercedes #LewisHamilton #McLaren #LandoNorris #MiamiGP\n\nhttps://t.co/39MnA80P6v
3,The Miami Grand Prix is almost here and all teams are getting ready for a Sprint Weekend 🏎️🔥\n\nRead our insights for this weeks Formula 1 race \n\n#F1 #Formula1 #RedBull #MaxVerstappen #Ferrari #CL16 #Mercedes #LewisHamilton #McLaren #LandoNorris #MiamiGP\n\nhttps://t.co/gfXIdIJ1AB
4,🎙️#Perez: “We're having a tremendous s eason again. The future looks bright. The #RedBull organization remains strong” \n\n(#F1 press conference) \n\n#MiamiGP https://t.co/h7atbf6sTv
5,"🎙️#Perez: “#Newey? It isn't ideal when someone like Adrian leaves. He spent 20 years at #RedBull, he probably he wants to do something else. Red Bull has a very strong organiziation. It isn't down to one individual. #Horner is ready” \n\n(#F1 press conference) \n\n#MiamiGP https://t.co/t8c5Ltl6Oc"
6,"#F1 2016, #RedBull TAG Heuer RB12 V6-Turbo, Max Verstappen. The beginning of something special #MiamiGP https://t.co/toAxZecQJ5"
7,Here we go #F1 #AdrianNewey #RedBull\n\nhttps://t.co/X8w2kwuGlE
8,"Here are the different RW configurations brought by the teams. \n\n#Ferrari It remains with the same configuration, it will be worth seeing the incidence of BW! \n\n#RedBull Excellent efficiency of DRS + little drag \n\n(📸 @AlbertFabrega)\n#f1 #formula1 #MiamiGP https://t.co/7sVMxeBFEa"
9,Max Verstappen chez Mercedes !!!\n\nDes sources autrichiennes affirment que l' équipe Mercedes préparerai une Offre\nde 128 millions de livres sterling par an .\nPrès de 150M€… \nà la suite sur Fb F1access\n#maxverstappen #F1 #RedBull #MercedesBenz #MiamiGP https://t.co/L0NaFJQDVa


# 2 - Nettoyage des tweets

## 2.1 Fonctions

In [148]:
# NETTOYAGE DES TWEETS
def clean_tweet(tweet):
    """
    Nettoie un tweet en supprimant les URL, les mentions, les hashtags et les caractères spéciaux.
    """
    # Supprimer les URL
    tweet = re.sub(r'http\S+', '', tweet)  
    # Supprimer les mentions
    tweet = re.sub(r'@\w+', '', tweet)     
    # Supprimer les hashtags
    tweet = re.sub(r'#\w+', '', tweet)     
    # Supprimer les caractères spéciaux
    tweet = re.sub(r"[^a-zA-Z0-9À-ÿ\s]", "", tweet)
    return tweet.strip()

def lower_start_fct(tweet) :
    return tweet.lower()

def lemma_fct(tweet):
    """
    Lemmatise les mots d'un tweet en utilisant le lemmatiseur de spaCy.
    """
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(tweet)
    return ' '.join([token.lemma_ for token in doc])

# TOKENISATION
def tokenize_tweet(tweet):
    return word_tokenize(tweet)

# FILTRAGE
# Filtrage des stop-words : mots inutiles
def remove_stopwords(tokens):
    """
    Supprime du tweet tous les stop-words.
    """
    stop_words = stopwords.words('french') + stopwords.words('english')
    filtered_tweets = []
    for token in tokens:
        if token not in stop_words:
            filtered_tweets.append(token)
    return filtered_tweets

## 2.2 - Test du nettoyage, de la tokenization et du filtrage

### 2.2.1 - Nettoyage

In [149]:
df['text_cleaned'] = df['text'].apply(clean_tweet)
df['text_cleaned'] = df['text_cleaned'].apply(lower_start_fct)
df.head()

Unnamed: 0,text,text_cleaned
0,Formula 1 is bringing the heat to Miami Grand Prix 🔥\n\n🏎️ Can Max Verstappen win again?\n🏎️ Will the new upgrades for McLaren and Mercedes work?\n\nRead our insights OUT NOW!\n\n#F1 #Formula1 #RedBull #Ferrari #CL16 #LewisHamilton #LandoNorris #MiamiGP\n\nhttps://t.co/tNar0iwflY,formula 1 is bringing the heat to miami grand prix \n\n can max verstappen win again\n will the new upgrades for mclaren and mercedes work\n\nread our insights out now
1,Formula 1 is bringing the heat to Miami Grand Prix 🔥\n\nRead our insights and predictions OUT NOW! 🏎️\n\n#F1 #Formula1 #RedBull #MaxVerstappen #Ferrari #CL16 #Mercedes #LewisHamilton #McLaren #LandoNorris #MiamiGP\n\nhttps://t.co/ZsuTm6aT5J,formula 1 is bringing the heat to miami grand prix \n\nread our insights and predictions out now
2,The Miami Grand Prix is almost here and all teams are getting ready for a Sprint Weekend 🏎️🔥\n\nRead our insights for this weeks Formula 1 race \n\n#F1 #Formula1 #RedBull #MaxVerstappen #Ferrari #CL16 #Mercedes #LewisHamilton #McLaren #LandoNorris #MiamiGP\n\nhttps://t.co/39MnA80P6v,the miami grand prix is almost here and all teams are getting ready for a sprint weekend \n\nread our insights for this weeks formula 1 race
3,The Miami Grand Prix is almost here and all teams are getting ready for a Sprint Weekend 🏎️🔥\n\nRead our insights for this weeks Formula 1 race \n\n#F1 #Formula1 #RedBull #MaxVerstappen #Ferrari #CL16 #Mercedes #LewisHamilton #McLaren #LandoNorris #MiamiGP\n\nhttps://t.co/gfXIdIJ1AB,the miami grand prix is almost here and all teams are getting ready for a sprint weekend \n\nread our insights for this weeks formula 1 race
4,🎙️#Perez: “We're having a tremendous s eason again. The future looks bright. The #RedBull organization remains strong” \n\n(#F1 press conference) \n\n#MiamiGP https://t.co/h7atbf6sTv,were having a tremendous s eason again the future looks bright the organization remains strong \n\n press conference


In [150]:
df['tweet_lemmatized'] = df['text_cleaned'].apply(lemma_fct)
df['tweet_lemmatized'].head() 

0    formula 1 be bring the heat to miami grand prix \n\n  can max verstappen win again \n  will the new upgrade for mclaren and mercede work \n\n read our insight out now
1                                                                              formula 1 be bring the heat to miami grand prix \n\n read our insight and prediction out now
2                                     the miami grand prix be almost here and all team be get ready for a sprint weekend \n\n read our insight for this week formula 1 race
3                                     the miami grand prix be almost here and all team be get ready for a sprint weekend \n\n read our insight for this week formula 1 race
4                                                      be have a tremendous s eason again the future look bright the   organization remain strong    \n\n  press conference
Name: tweet_lemmatized, dtype: object

### 2.2.2 - Tokenization

In [157]:
# Tokenisation 
df['tokens'] = df['tweet_lemmatized'].apply(tokenize_tweet)

# Garde des mots uniques dans chaque liste
df['tokens'] = df['tokens'].apply(lambda x: list(set(x)))
df['tokens'].head()

0    [prix, mclaren, for, be, bring, work, grand, can, mercede, miami, to, upgrade, now, and, again, insight, out, will, heat, verstappen, formula, read, the, max, 1, new, our, win]
1                                                                           [formula, prediction, read, prix, the, miami, be, bring, 1, to, grand, our, and, insight, out, now, heat]
2                                  [get, prix, for, be, grand, miami, team, ready, and, almost, this, weekend, insight, a, race, sprint, formula, read, here, the, week, all, our, 1]
3                                  [get, prix, for, be, grand, miami, team, ready, and, almost, this, weekend, insight, a, race, sprint, formula, read, here, the, week, all, our, 1]
4                                                              [look, again, tremendous, future, bright, remain, strong, press, the, organization, have, s, be, conference, a, eason]
Name: tokens, dtype: object

### 2.2.3 - Filtre Stopwords

In [156]:
df['filtered_tokens'] = df['tokens'].apply(remove_stopwords)
df.head(10)

Unnamed: 0,text,text_cleaned,tweet_lemmatized,tokens,filtered_tokens
0,Formula 1 is bringing the heat to Miami Grand Prix 🔥\n\n🏎️ Can Max Verstappen win again?\n🏎️ Will the new upgrades for McLaren and Mercedes work?\n\nRead our insights OUT NOW!\n\n#F1 #Formula1 #RedBull #Ferrari #CL16 #LewisHamilton #LandoNorris #MiamiGP\n\nhttps://t.co/tNar0iwflY,formula 1 is bringing the heat to miami grand prix \n\n can max verstappen win again\n will the new upgrades for mclaren and mercedes work\n\nread our insights out now,formula 1 be bring the heat to miami grand prix \n\n can max verstappen win again \n will the new upgrade for mclaren and mercede work \n\n read our insight out now,"[prix, mclaren, for, be, bring, work, grand, can, mercede, miami, to, upgrade, now, and, again, insight, out, will, heat, verstappen, formula, read, the, max, 1, new, our, win]","[prix, mclaren, bring, work, grand, mercede, miami, upgrade, insight, heat, verstappen, formula, read, max, 1, new, win]"
1,Formula 1 is bringing the heat to Miami Grand Prix 🔥\n\nRead our insights and predictions OUT NOW! 🏎️\n\n#F1 #Formula1 #RedBull #MaxVerstappen #Ferrari #CL16 #Mercedes #LewisHamilton #McLaren #LandoNorris #MiamiGP\n\nhttps://t.co/ZsuTm6aT5J,formula 1 is bringing the heat to miami grand prix \n\nread our insights and predictions out now,formula 1 be bring the heat to miami grand prix \n\n read our insight and prediction out now,"[formula, prediction, read, prix, and, the, miami, be, bring, 1, to, grand, our, insight, out, now, heat]","[formula, prediction, read, prix, miami, bring, 1, grand, insight, heat]"
2,The Miami Grand Prix is almost here and all teams are getting ready for a Sprint Weekend 🏎️🔥\n\nRead our insights for this weeks Formula 1 race \n\n#F1 #Formula1 #RedBull #MaxVerstappen #Ferrari #CL16 #Mercedes #LewisHamilton #McLaren #LandoNorris #MiamiGP\n\nhttps://t.co/39MnA80P6v,the miami grand prix is almost here and all teams are getting ready for a sprint weekend \n\nread our insights for this weeks formula 1 race,the miami grand prix be almost here and all team be get ready for a sprint weekend \n\n read our insight for this week formula 1 race,"[get, prix, for, be, grand, miami, team, ready, and, almost, this, weekend, insight, a, race, sprint, formula, read, here, the, week, all, our, 1]","[get, prix, grand, miami, team, ready, almost, weekend, insight, race, sprint, formula, read, week, 1]"
3,The Miami Grand Prix is almost here and all teams are getting ready for a Sprint Weekend 🏎️🔥\n\nRead our insights for this weeks Formula 1 race \n\n#F1 #Formula1 #RedBull #MaxVerstappen #Ferrari #CL16 #Mercedes #LewisHamilton #McLaren #LandoNorris #MiamiGP\n\nhttps://t.co/gfXIdIJ1AB,the miami grand prix is almost here and all teams are getting ready for a sprint weekend \n\nread our insights for this weeks formula 1 race,the miami grand prix be almost here and all team be get ready for a sprint weekend \n\n read our insight for this week formula 1 race,"[get, prix, for, be, grand, miami, team, ready, and, almost, this, weekend, insight, a, race, sprint, formula, read, here, the, week, all, our, 1]","[get, prix, grand, miami, team, ready, almost, weekend, insight, race, sprint, formula, read, week, 1]"
4,🎙️#Perez: “We're having a tremendous s eason again. The future looks bright. The #RedBull organization remains strong” \n\n(#F1 press conference) \n\n#MiamiGP https://t.co/h7atbf6sTv,were having a tremendous s eason again the future looks bright the organization remains strong \n\n press conference,be have a tremendous s eason again the future look bright the organization remain strong \n\n press conference,"[again, tremendous, look, future, bright, remain, strong, press, the, organization, have, s, be, conference, a, eason]","[tremendous, look, future, bright, remain, strong, press, organization, conference, eason]"
5,"🎙️#Perez: “#Newey? It isn't ideal when someone like Adrian leaves. He spent 20 years at #RedBull, he probably he wants to do something else. Red Bull has a very strong organiziation. It isn't down to one individual. #Horner is ready” \n\n(#F1 press conference) \n\n#MiamiGP https://t.co/t8c5Ltl6Oc",it isnt ideal when someone like adrian leaves he spent 20 years at he probably he wants to do something else red bull has a very strong organiziation it isnt down to one individual is ready \n\n press conference,it be not ideal when someone like adrian leave he spend 20 year at he probably he want to do something else red bull have a very strong organiziation it be not down to one individual be ready \n\n press conference,"[year, one, adrian, down, want, be, bull, probably, red, something, else, individual, at, he, strong, it, to, ready, like, leave, have, when, a, very, do, press, ideal, 20, not, spend, conference, someone, organiziation]","[year, one, adrian, want, bull, probably, red, something, else, individual, strong, ready, like, leave, press, ideal, 20, spend, conference, someone, organiziation]"
6,"#F1 2016, #RedBull TAG Heuer RB12 V6-Turbo, Max Verstappen. The beginning of something special #MiamiGP https://t.co/toAxZecQJ5",2016 tag heuer rb12 v6turbo max verstappen the beginning of something special,2016 tag heuer rb12 v6turbo max verstappen the beginning of something special,"[of, something, heuer, rb12, the, v6turbo, special, max, 2016, beginning, tag, verstappen]","[something, heuer, rb12, v6turbo, special, max, 2016, beginning, tag, verstappen]"
7,Here we go #F1 #AdrianNewey #RedBull\n\nhttps://t.co/X8w2kwuGlE,here we go,here we go,"[go, here, we]",[go]
8,"Here are the different RW configurations brought by the teams. \n\n#Ferrari It remains with the same configuration, it will be worth seeing the incidence of BW! \n\n#RedBull Excellent efficiency of DRS + little drag \n\n(📸 @AlbertFabrega)\n#f1 #formula1 #MiamiGP https://t.co/7sVMxeBFEa",here are the different rw configurations brought by the teams \n\n it remains with the same configuration it will be worth seeing the incidence of bw \n\n excellent efficiency of drs little drag,here be the different rw configuration bring by the team \n\n it remain with the same configuration it will be worth see the incidence of bw \n\n excellent efficiency of drs little drag,"[little, be, bring, see, worth, efficiency, same, of, configuration, by, remain, it, bw, team, different, with, drs, rw, will, excellent, here, the, incidence, drag]","[little, bring, see, worth, efficiency, configuration, remain, bw, team, different, drs, rw, excellent, incidence, drag]"
9,Max Verstappen chez Mercedes !!!\n\nDes sources autrichiennes affirment que l' équipe Mercedes préparerai une Offre\nde 128 millions de livres sterling par an .\nPrès de 150M€… \nà la suite sur Fb F1access\n#maxverstappen #F1 #RedBull #MercedesBenz #MiamiGP https://t.co/L0NaFJQDVa,max verstappen chez mercedes \n\ndes sources autrichiennes affirment que l équipe mercedes préparerai une offre\nde 128 millions de livres sterling par an \nprès de 150m \nà la suite sur fb f1access,max verstappen chez mercede \n\n des source autrichienne affirment que l équipe mercede préparerai une offre \n de 128 million de livre sterling par an \n près de 150 m \n à la suite sur fb f1access,"[source, chez, préparerai, des, livre, f1access, sur, mercede, l, la, à, million, autrichienne, m, affirment, 128, près, sterling, 150, offre, suite, fb, équipe, de, verstappen, max, que, an, une, par]","[source, chez, préparerai, livre, f1access, mercede, million, autrichienne, affirment, 128, près, sterling, 150, offre, suite, fb, équipe, verstappen, max]"


In [158]:
# Saving the DataFrame as a CSV file
df.to_csv("data/filtered_tweets.csv", index=False)
print("Données enregistrées dans filtered_tweets.csv")

Données enregistrées dans filtered_tweets.csv
