In [3]:
import tweepy
import pandas as pd
import re
import os
from dotenv import load_dotenv

load_dotenv('../.env')

True

In [4]:

client = tweepy.Client(bearer_token=os.environ.get("TWITTER_TOKEN"))

tweets = client.search_recent_tweets("(#bitcoin OR #Bitcoin OR #BTC OR #btc) -is:retweet lang:en",max_results=100,).data
tweets_df = pd.DataFrame([tweet.data for tweet in tweets])
tweets_df.drop(columns=['id','edit_history_tweet_ids'],inplace=True)
tweets_df.head()

Unnamed: 0,text
0,@RealCompounding @TheMoonCarl CHILD SUPPORT\n✅...
1,@Econ101_author @bitcoin__help @adam3us @Block...
2,"#Dogecoin is under hammer, its hard for doge t..."
3,$BTCUSD Buys from this morning are running.. #...
4,@cryptocharts CHILD SUPPORT\n✅CEX’s listing in...


In [5]:


def clean_tweet_dataframe(raw_df: pd.DataFrame, column: str = "text") -> pd.DataFrame:
    """Function that will clean an entire column of a dataframe.

    Args:
        raw_df (pd.DataFrame): The dataframe to clean.
        column (str): The column containing the tweets. Default to text.

    Returns:
        pd.DataFrame: _description_
    """

    def clean_tweet(txt: str) -> str:
        """This function takes a string as input and returns a cleaned version of the
        string by removing hashtags, newlines, URLs, @ symbols, and emojis. It also
        converts the string to lowercase.

        Args:
            txt (str): The input string to be cleaned.

        Returns:
            str: The cleaned version of the input string.
        """
        txt = str(txt)
        txt = re.sub(r"#", "", txt)
        txt = re.sub(r"\n", "", txt)
        txt = re.sub(r"https?:\/\/\S+", "", txt)
        txt = re.sub(r"@", "", txt)
        emoji_pattern = re.compile(
            "["
            "\U0001F600-\U0001F64F"  # emoticons
            "\U0001F300-\U0001F5FF"  # symbols & pictographs
            "\U0001F680-\U0001F6FF"  # transport & map symbols
            "\U0001F1E0-\U0001F1FF"  # flags (iOS)
            "\U00002500-\U00002BEF"  # chinese char
            "\U00002702-\U000027B0"
            "\U00002702-\U000027B0"
            "\U000024C2-\U0001F251"
            "\U0001f926-\U0001f937"
            "\U00010000-\U0010ffff"
            "\u2640-\u2642"
            "\u2600-\u2B55"
            "\u200d"
            "\u23cf"
            "\u23e9"
            "\u231a"
            "\ufe0f"  # dingbats
            "\u3030"
            "]+",
            flags=re.UNICODE,
        )

        return emoji_pattern.sub(r"", txt).lower()

    raw_df[column] = raw_df[column].apply(clean_tweet)
    return raw_df


In [7]:
tweets_df = clean_tweet_dataframe(tweets_df)
tweets_df.head(10)

Unnamed: 0,text
0,realcompounding themooncarl child supportcex’s...
1,econ101_author bitcoin__help adam3us blockstre...
2,"dogecoin is under hammer, its hard for doge to..."
3,$btcusd buys from this morning are running.. b...
4,cryptocharts child supportcex’s listing incomi...
5,ready to live life in the fast lane? invest in...
6,now is probably a good time to announce that o...
7,gecko_gangsta flokiceo - elon musk loves floki...
8,1₿ = $23620 3.01%details:change: 690.64$24h lo...
9,scam detectedscam crypto platform bitcoinex i...


# Afinn

In [8]:
from afinn import Afinn

afinn = Afinn()

def classify_tweet_afinn(tweet: str) -> str:
    """Get the sentiment of a tweet using the afinn library.

    Args:
        tweet (str): The tweet text.

    Returns:
        str: "Positive" if the sentiment is positive,
                "Negative" if the sentiment is negative.
    """
    score = afinn.score(tweet)
    return "Negative" if score < 0 else "Positive"

tweets_df['afinn'] = tweets_df.text.apply(classify_tweet_afinn) 
tweets_df.head(15)

Unnamed: 0,text,afinn
0,realcompounding themooncarl child supportcex’s...,Positive
1,econ101_author bitcoin__help adam3us blockstre...,Positive
2,"dogecoin is under hammer, its hard for doge to...",Negative
3,$btcusd buys from this morning are running.. b...,Positive
4,cryptocharts child supportcex’s listing incomi...,Positive
5,ready to live life in the fast lane? invest in...,Positive
6,now is probably a good time to announce that o...,Positive
7,gecko_gangsta flokiceo - elon musk loves floki...,Positive
8,1₿ = $23620 3.01%details:change: 690.64$24h lo...,Positive
9,scam detectedscam crypto platform bitcoinex i...,Negative


# TextBlob

## Polarity

In [9]:
from textblob import TextBlob

def classify_tweet_textblob(tweet: str) -> str:
    """Get the sentiment of a tweet using the TextBlob library.

    Args:
        tweet (str): The tweet text.

    Returns:
        str: "Positive" if the sentiment is positive,
                "Negative" if the sentiment is negative.
    """

    score = TextBlob(tweet).sentiment.polarity

    return "Negative" if score < 0 else "Positive"

tweets_df['textblob'] = tweets_df.text.apply(classify_tweet_afinn) 
tweets_df.head(15)

Unnamed: 0,text,afinn,textblob
0,realcompounding themooncarl child supportcex’s...,Positive,Positive
1,econ101_author bitcoin__help adam3us blockstre...,Positive,Positive
2,"dogecoin is under hammer, its hard for doge to...",Negative,Negative
3,$btcusd buys from this morning are running.. b...,Positive,Positive
4,cryptocharts child supportcex’s listing incomi...,Positive,Positive
5,ready to live life in the fast lane? invest in...,Positive,Positive
6,now is probably a good time to announce that o...,Positive,Positive
7,gecko_gangsta flokiceo - elon musk loves floki...,Positive,Positive
8,1₿ = $23620 3.01%details:change: 690.64$24h lo...,Positive,Positive
9,scam detectedscam crypto platform bitcoinex i...,Negative,Negative


## Subjectivity

In [10]:
def getSubjectivity(txt):
    return TextBlob(txt).sentiment.subjectivity
tweets_df['subjectivity'] = tweets_df.text.apply(getSubjectivity)
tweets_df

Unnamed: 0,text,afinn,textblob,subjectivity
0,realcompounding themooncarl child supportcex’s...,Positive,Positive,0.000000
1,econ101_author bitcoin__help adam3us blockstre...,Positive,Positive,0.748333
2,"dogecoin is under hammer, its hard for doge to...",Negative,Negative,0.520833
3,$btcusd buys from this morning are running.. b...,Positive,Positive,0.000000
4,cryptocharts child supportcex’s listing incomi...,Positive,Positive,0.000000
...,...,...,...,...
95,"if you missed bitcoin please do not, for an...",Negative,Negative,0.000000
96,"nft collection lunar moon plots price, stats, ...",Positive,Positive,0.000000
97,thoughts on $btc bitcoin! see cryptomate92's i...,Positive,Positive,0.000000
98,knapstacker tmr should be 24.500$ it’s getting...,Positive,Positive,0.535556
