In [70]:
from tweety.bot import Twitter
import re
from typing import List, Dict
from tweety.types import Tweet
import pandas as pd
from datetime import datetime
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
import json

### Extracting tweets

In [71]:
twitter_client = Twitter()
twitter_handle = 'elonmusk'
tweets = twitter_client.get_tweets(twitter_handle)
print(f"{tweets = }")

tweets = UserTweets(user_id=44196397, count=99)


In [74]:
tweets[0].author.name

'Elon Musk'

In [41]:
def clean_tweet(text: str) -> str:
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"www.\S+", "", text)
    return re.sub(r"\s+", " ", text)
    
def create_dataframe_from_tweets(tweets: List[Tweet]) -> pd.DataFrame:
    rows = []
    for tweet in tweets:
        clean_text = clean_tweet(tweet.text)
        if len(clean_text) == 0:
            continue
        rows.append(
            {
                "id": tweet.id,
                "text": clean_text,
                "author": tweet.author.username,
                "date": str(tweet.date.date()),
                "created_at": tweet.date,
                "views": tweet.views,
            }
        )
    df = pd.DataFrame(
        rows, columns=["id", "text", "author", "date", "views", "created_at"]
    )
    df.set_index("id", inplace=True)
    if df.empty:
        print('empty df')
        return df
    # df = df[df.created_at.dt.date > datetime.now().date() - pd.to_timedelta("7day")]
    # print(df)
    
    return df.sort_values(by="created_at", ascending=False)

In [47]:
df = create_dataframe_from_tweets(tweets)

In [52]:
datetime.now().date()

datetime.date(2024, 9, 1)

In [54]:
datetime.now().date() - pd.to_timedelta("30day")

datetime.date(2024, 8, 2)

In [51]:
df[df.created_at.dt.date > datetime.now().date()- pd.to_timedelta("30day")]

Unnamed: 0_level_0,text,author,date,views,created_at
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1825723913051000851,I am willing to serve,elonmusk,2024-08-20,81392833,2024-08-20 02:37:55+00:00
1823742501884453312,Haters will say this is AI 🕺🕺,elonmusk,2024-08-14,141316557,2024-08-14 15:24:30+00:00
1822290335059353965,Found this pic of the UK justice system,elonmusk,2024-08-10,101912839,2024-08-10 15:14:07+00:00
1820221050904359331,😬,elonmusk,2024-08-04,136679661,2024-08-04 22:11:31+00:00


### Building Sentiment analyzer

In [58]:
PROMPT_TEMPLATE = """
You're a cryptocurrency trader with 10+ years of experience. You always follow the trend
and follow and deeply understand crypto experts on Twitter. You always consider the historical predictions for each expert on Twitter.

You're given tweets and their view count from @{twitter_handle} for specific dates:

{tweets}

Tell how bullish or bearish the tweets for each date are. Use numbers between 0 and 100, where 0 is extremely bearish and 100 is extremely bullish.
Use a JSON using the format:

date: sentiment

Each record of the JSON should give the aggregate sentiment for that date. Return just the JSON. Do not explain.
"""

In [None]:
def create_tweet_list_for_prompt(tweets: List[Tweet], twitter_handle: str) -> str:
    df = create_dataframe_from_tweets(tweets)
    user_tweets = df[df.author == twitter_handle]
    if user_tweets.empty:
        return ""
    if len(user_tweets) > 100:
        user_tweets = user_tweets.sample(n=100)

    text = ""

    for tweets_date, tweets in user_tweets.groupby("date"):
        text += f"{tweets_date}:"
        for tweet in tweets.itertuples():
            text += f"\n{tweet.views} - {tweet.text}"
    return text

In [67]:
twitter_handle = 'elonmusk'
user_tweets = df[df.author == twitter_handle]
text = ''
for tweets_date, tweets in user_tweets.groupby("date"):
    text += f"{tweets_date}:"
    for tweet in tweets.itertuples():
        text += f"\n{tweet.views} - {tweet.text}"
    print(text)
    # break

2018-10-19:
Unavailable - Had to been done ur welcome 
2018-10-19:
Unavailable - Had to been done ur welcome 2020-03-06:
Unavailable - The coronavirus panic is dumb
2018-10-19:
Unavailable - Had to been done ur welcome 2020-03-06:
Unavailable - The coronavirus panic is dumb2021-05-02:
Unavailable - I love Art Deco
2018-10-19:
Unavailable - Had to been done ur welcome 2020-03-06:
Unavailable - The coronavirus panic is dumb2021-05-02:
Unavailable - I love Art Deco2022-01-06:
Unavailable - Starlinks with “lasers” deployed to orbit 
2018-10-19:
Unavailable - Had to been done ur welcome 2020-03-06:
Unavailable - The coronavirus panic is dumb2021-05-02:
Unavailable - I love Art Deco2022-01-06:
Unavailable - Starlinks with “lasers” deployed to orbit 2022-04-14:
Unavailable - i♥️u
2018-10-19:
Unavailable - Had to been done ur welcome 2020-03-06:
Unavailable - The coronavirus panic is dumb2021-05-02:
Unavailable - I love Art Deco2022-01-06:
Unavailable - Starlinks with “lasers” deployed to orbi

In [61]:
user_tweets = user_tweets.groupby("date")
user_tweets

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x10e0dfe50>

In [None]:
def analyze_sentiment(twitter_handle: str, tweets: List[Tweet]) -> Dict[str, int]:
    chat_gpt = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
    prompt = PromptTemplate(
        input_variables=["twitter_handle", "tweets"], template=PROMPT_TEMPLATE
    )

    sentiment_chain = LLMChain(llm=chat_gpt, prompt=prompt)
    response = sentiment_chain(
        {
            "twitter_handle": twitter_handle,
            "tweets": create_tweet_list_for_prompt(tweets, twitter_handle),
        }
    )
    return json.loads(response["text"])