In [None]:
import tweepy
import pandas as pd

client = tweepy.Client(bearer_token="")

tweets = client.search_recent_tweets("(#bitcoin OR #Bitcoin OR #BTC OR #btc) -is:retweet lang:en",max_results=100,).data
tweets_df = pd.DataFrame([tweet.data for tweet in tweets])
tweets_df.drop(columns=['id','edit_history_tweet_ids'],inplace=True)
tweets_df.head()

In [None]:
import re

def clean_tweet_dataframe(raw_df: pd.DataFrame, column: str = "text") -> pd.DataFrame:
    """Function that will clean an entire column of a dataframe.

    Args:
        raw_df (pd.DataFrame): The dataframe to clean.
        column (str): The column containing the tweets. Default to text.

    Returns:
        pd.DataFrame: _description_
    """

    def clean_tweet(txt: str) -> str:
        """This function takes a string as input and returns a cleaned version of the
        string by removing hashtags, newlines, URLs, @ symbols, and emojis. It also
        converts the string to lowercase.

        Args:
            txt (str): The input string to be cleaned.

        Returns:
            str: The cleaned version of the input string.
        """
        txt = str(txt)
        txt = re.sub(r"#", "", txt)
        txt = re.sub(r"\n", "", txt)
        txt = re.sub(r"https?:\/\/\S+", "", txt)
        txt = re.sub(r"@", "", txt)
        emoji_pattern = re.compile(
            "["
            "\U0001F600-\U0001F64F"  # emoticons
            "\U0001F300-\U0001F5FF"  # symbols & pictographs
            "\U0001F680-\U0001F6FF"  # transport & map symbols
            "\U0001F1E0-\U0001F1FF"  # flags (iOS)
            "\U00002500-\U00002BEF"  # chinese char
            "\U00002702-\U000027B0"
            "\U00002702-\U000027B0"
            "\U000024C2-\U0001F251"
            "\U0001f926-\U0001f937"
            "\U00010000-\U0010ffff"
            "\u2640-\u2642"
            "\u2600-\u2B55"
            "\u200d"
            "\u23cf"
            "\u23e9"
            "\u231a"
            "\ufe0f"  # dingbats
            "\u3030"
            "]+",
            flags=re.UNICODE,
        )

        return emoji_pattern.sub(r"", txt).lower()

    raw_df[column] = raw_df[column].apply(clean_tweet)
    return raw_df


In [None]:
tweets_df.text = tweets_df.text.apply(clean_tweet_dataframe)
tweets_df.head(10)

# Afinn

In [None]:
from afinn import Afinn

afinn = Afinn()

def classify_tweet_afinn(tweet: str) -> str:
    """Get the sentiment of a tweet using the afinn library.

    Args:
        tweet (str): The tweet text.

    Returns:
        str: "Positive" if the sentiment is positive,
                "Negative" if the sentiment is negative.
    """
    score = afinn.score(tweet)
    return "Negative" if score < 0 else "Positive"

tweets_df['afinn'] = tweets_df.text.apply(classify_tweet_afinn) 
tweets_df.head(15)

# TextBlob

## Polarity

In [None]:
from textblob import TextBlob

def classify_tweet_textblob(tweet: str) -> str:
    """Get the sentiment of a tweet using the TextBlob library.

    Args:
        tweet (str): The tweet text.

    Returns:
        str: "Positive" if the sentiment is positive,
                "Negative" if the sentiment is negative.
    """

    score = TextBlob(tweet).sentiment.polarity

    return "Negative" if score < 0 else "Positive"

tweets_df['textblob'] = tweets_df.text.apply(classify_tweet_afinn) 
tweets_df.head(15)

## Subjectivity

In [None]:
def getSubjectivity(txt):
    return TextBlob(txt).sentiment.subjectivity
tweets_df['subjectivity'] = tweets_df.text.apply(getSubjectivity)