# Stock Sentiment Analyzer


In [1]:
import requests
import pandas as pd 
import json
import google.generativeai as geai
import enum
from typing_extensions import TypedDict
import plotly.express as px
import os
from dotenv import load_dotenv


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Env 
load_dotenv()

# BlueSky API
BLUESKY_HANDLE = os.getenv('USERNAME')
BLUESKY_PASSWORD = os.getenv('PASSWORD')

# Google API
geai.configure(api_key=os.environ['GEMINY_APY'])
print(BLUESKY_HANDLE, BLUESKY_PASSWORD)


ivo196.bsky.social kkfk-diqs-y6aa-qd2h


In [3]:
# Gemini model 
model = geai.GenerativeModel(model_name="gemini-1.5-flash")

In [4]:
# Stock ticker
ticker = "BTC"
# Numbers of the post to return 
n_posts = 10

In [6]:
# Bue Sky Web Scraper 
print(BLUESKY_HANDLE, BLUESKY_PASSWORD)
auth_response = requests.post("https://bsky.social/xrpc/com.atproto.server.createSession", json={"identifier": BLUESKY_HANDLE, "password": BLUESKY_PASSWORD})
auth_response.raise_for_status()
access_token = auth_response.json()["accessJwt"]
print(access_token)

ivo196.bsky.social kkfk-diqs-y6aa-qd2h
eyJ0eXAiOiJhdCtqd3QiLCJhbGciOiJFUzI1NksifQ.eyJzY29wZSI6ImNvbS5hdHByb3RvLmFwcFBhc3NQcml2aWxlZ2VkIiwic3ViIjoiZGlkOnBsYzp3bmZsbWN1aHNscmx2b2kzZ2l4Z2JuM3kiLCJpYXQiOjE3MzU5ODU5NzgsImV4cCI6MTczNTk5MzE3OCwiYXVkIjoiZGlkOndlYjptb3R0bGVnaWxsLnVzLXdlc3QuaG9zdC5ic2t5Lm5ldHdvcmsifQ.qYsuUWL8gIWUDKOtdkq4tspOPzgHJfk0drjfT7ZWyxZFmPmP6wfpVzEVb6KDn1VDl30otFfD3dwkeWY6l4dkbA


In [8]:
# Set up the headers for the API request
headers = {
    "Authorization": f"Bearer {access_token}"
}
# Define the search parameters
params = {
    "q" : ticker,
    "sort": "latest",
    "limit" : n_posts
}

search_response = requests.get("https://bsky.social/xrpc/app.bsky.feed.searchPosts",
                               headers=headers, 
                               params=params)
search_response.raise_for_status()
posts = search_response.json().get("posts",[])


In [13]:
# Extract data and create a list of dictionaries 

data = []
for post in posts:
    author = post.get("author", {}).get("handle", "Unknown")
    content = post.get("record", {}).get("text", "No content")
    created_at = post.get("record", {}).get("createdAt", "Unkown date")
    data.append({
        "Date": created_at,
        "Content": content,
        "Author": author,
    })
# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df


Unnamed: 0,Date,Content,Author
0,2025-01-04 10:32:09+00:00,Древний биткойн-кит снова в деле: продажа посл...,kriptabiz.bsky.social
1,2025-01-04 10:32:05+00:00,Ballena Bitcoin del Pasado Realiza Otra Venta ...,eldinerolat.bsky.social
2,NaT,"BTC-USD: 97700.805000 (in 24h +1165.060000, +1...",cryptobot.yaizawa.jp
3,NaT,China remains the second-largest holder of Bit...,newshoundai.bsky.social
4,2025-01-04 10:27:04+00:00,"比特币价格分析：BTC跌破61,000美元，下一目标在哪里？\r\n\r\nhttps://...",qiancx.bsky.social
5,NaT,🚨Cyberattack Alert ‼️\n\n🇨🇦Canada - Montréal N...,hackmanac.com
6,2025-01-04 10:26:25+00:00,传奇交易员彼得·布兰特分享关键比特币（BTC）图表！\r\n\r\nhttps://qian...,qiancx.bsky.social
7,NaT,Another prediction made by Geraci is the intro...,newshoundai.bsky.social
8,2025-01-04 10:24:11+00:00,比特币创业实验室推出BRC-20 LABB代币，助力BTC创业生态发展\r\n\r\nhtt...,qiancx.bsky.social
9,NaT,"Bitcoin is on the move! 📊 Current price: $101,...",samathasheilafx.bsky.social


In [14]:
# Gemini Sentiment Analysis
class Sentiment(enum.Enum):
    POSITIVE = "positive"
    NEGATIVE = "negative"
    NEUTRAL = "neutral"
class AnylysisResult(TypedDict): 
    is_stock_related: bool
    sentiment: Sentiment

In [15]:
def analyze_post (content : str) -> AnylysisResult: 
    prompt = f'''
    Analyze following post and determine:
    1- whether it is related with the company, {ticker}, and related or discusses past, current or future stock performance of {ticker} explicitly.
    2- If related, classify the sentiment as positive, negative or neutral. 
    Post: {content}
    '''
    response = model.generate_content(
        prompt,
        generation_config=geai.GenerationConfig(
            response_mime_type="application/json",
            response_schema=AnylysisResult
        )
    )

    if response.candidates:
        candidate_content = response.candidates[0].content
        result_text = ''.join(part.text for part in candidate_content.parts)
        try:
            result = json.loads(result_text)
            is_stock_related = result.get("is_stock_related")
            sentiment = result.get("sentiment")
            if is_stock_related is not None and sentiment is not None:
                return is_stock_related, sentiment
            else:
                print("Missing expected keys in the response")
                return None,None
        except json.JSONDecodeError:
            print("Failed to decode JSON response")
            return None,None
    else:
        print("No candidates returned in the response")
        return None,None

In [12]:
# Apply the analysis to each post in the DataFrame
df[['is_stock_related', 'sentiment']] = df['Content'].apply(
    lambda x: pd.Series(analyze_post(x))
)

In [16]:
df


Unnamed: 0,Date,Content,Author
0,2025-01-04 10:32:09+00:00,Древний биткойн-кит снова в деле: продажа посл...,kriptabiz.bsky.social
1,2025-01-04 10:32:05+00:00,Ballena Bitcoin del Pasado Realiza Otra Venta ...,eldinerolat.bsky.social
2,NaT,"BTC-USD: 97700.805000 (in 24h +1165.060000, +1...",cryptobot.yaizawa.jp
3,NaT,China remains the second-largest holder of Bit...,newshoundai.bsky.social
4,2025-01-04 10:27:04+00:00,"比特币价格分析：BTC跌破61,000美元，下一目标在哪里？\r\n\r\nhttps://...",qiancx.bsky.social
5,NaT,🚨Cyberattack Alert ‼️\n\n🇨🇦Canada - Montréal N...,hackmanac.com
6,2025-01-04 10:26:25+00:00,传奇交易员彼得·布兰特分享关键比特币（BTC）图表！\r\n\r\nhttps://qian...,qiancx.bsky.social
7,NaT,Another prediction made by Geraci is the intro...,newshoundai.bsky.social
8,2025-01-04 10:24:11+00:00,比特币创业实验室推出BRC-20 LABB代币，助力BTC创业生态发展\r\n\r\nhtt...,qiancx.bsky.social
9,NaT,"Bitcoin is on the move! 📊 Current price: $101,...",samathasheilafx.bsky.social


In [17]:
# Filter out neutral sentiment
filtered_df = df[df['sentiment'] != 'neutral']

# Extract the date (day only) and calculate daily positive sentiment score
filtered_df['Day'] = filtered_df['Date'].dt.date
daily_sentiment = (
    filtered_df.groupby('Day')['sentiment']
    .apply(lambda x: (x == 'positive').sum() / len(x))
    .reset_index(name='positive_sentiment_score')
)

# Plot the daily sentiment score
fig = px.line(
    daily_sentiment,
    x='Day',
    y='positive_sentiment_score',
    title='Daily Positive Sentiment Score',
    labels={'positive_sentiment_score': 'Positive Sentiment Score', 'Day': 'Date'},
    markers=True,
)

fig.update_xaxes(dtick="D", tickformat="%Y-%m-%d")

fig

KeyError: 'sentiment'