# Stock Sentiment Analyzer


In [53]:
import requests
import pandas as pd 
import json
import google.generativeai as geai
import enum
from typing_extensions import TypedDict
import plotly.express as px
import os
from dotenv import load_dotenv


In [54]:
#Env 
load_dotenv()

# BlueSky API
BLUESKY_HANDLE = os.getenv('USERNAME')
BLUESKY_PASSWORD = os.getenv('PASSWORD')

# Google API
geai.configure(api_key=os.environ['GEMINY_APY'])
print(BLUESKY_HANDLE, BLUESKY_PASSWORD)


ivo196.bsky.social kkfk-diqs-y6aa-qd2h


In [55]:
# Gemini model 
model = geai.GenerativeModel(model_name="gemini-1.5-flash")

In [56]:
# Stock ticker
ticker = "AAPL"
# Numbers of the post to return 
n_posts = 20

In [57]:
# Bue Sky Web Scraper 
print(BLUESKY_HANDLE, BLUESKY_PASSWORD)
auth_response = requests.post("https://bsky.social/xrpc/com.atproto.server.createSession", json={"identifier": BLUESKY_HANDLE, "password": BLUESKY_PASSWORD})
auth_response.raise_for_status()
access_token = auth_response.json()["accessJwt"]
print(access_token)

ivo196.bsky.social kkfk-diqs-y6aa-qd2h
eyJ0eXAiOiJhdCtqd3QiLCJhbGciOiJFUzI1NksifQ.eyJzY29wZSI6ImNvbS5hdHByb3RvLmFwcFBhc3NQcml2aWxlZ2VkIiwic3ViIjoiZGlkOnBsYzp3bmZsbWN1aHNscmx2b2kzZ2l4Z2JuM3kiLCJpYXQiOjE3MzU5MjYwODgsImV4cCI6MTczNTkzMzI4OCwiYXVkIjoiZGlkOndlYjptb3R0bGVnaWxsLnVzLXdlc3QuaG9zdC5ic2t5Lm5ldHdvcmsifQ.bp_YqGmWaBuMGs4rZKYNunptrHrcCUvZDz_Uj9bQPBWXdwR7bp8HF7puK7irvwbTkLUsqcPygA8FIU81twGd0w


In [58]:
# Set up the headers for the API request
headers = {
    "Authorization": f"Bearer {access_token}"
}
# Define the search parameters
params = {
    "q" : ticker,
    "sort": "latest",
    "limit" : n_posts
}

search_response = requests.get("https://bsky.social/xrpc/app.bsky.feed.searchPosts",
                               headers=headers, 
                               params=params)
search_response.raise_for_status()
posts = search_response.json().get("posts",[])


In [59]:
# Extract data and create a list of dictionaries 

data = []
for post in posts:
    author = post.get("author", {}).get("handle", "Unknown")
    content = post.get("record", {}).get("text", "No content")
    created_at = post.get("record", {}).get("createdAt", "Unkown date")
    data.append({
        "Date": created_at,
        "Content": content,
        "Author": author,
    })
# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df


Unnamed: 0,Date,Content,Author
0,2025-01-03 17:38:57.897000+00:00,Notable lagging performance by $aapl and $nflx...,hoon.bsky.social
1,2025-01-03 16:47:52.542000+00:00,💥 Institutions trading #options with high urge...,insiderfinance.com
2,2025-01-03 16:05:51.010000+00:00,China's a battlefield for Apple as Huawei gain...,kautious.bsky.social
3,2025-01-03 16:00:11.649000+00:00,$AAPL: Apple stock dips 3% as tech giants face...,financepulsehq.bsky.social
4,2025-01-03 15:43:43.395922+00:00,UBS Group AG shares rise 3.53% intraday after ...,fintwitter.bsky.social
5,2025-01-03 15:34:10.613260+00:00,Telephone And Data Systems shares rise 1.19% i...,fintwitter.bsky.social
6,NaT,Past five days: \n\n$AAPL -5.5%\n$NVDA ...,morningbrew.extwitter.link
7,2025-01-03 15:04:36.179000+00:00,$TSLA $AAPL $MSFT $DAL $BERY $ZIM $BABA $BAC $...,theoptionsbully2.bsky.social
8,2025-01-03 14:52:02.184071+00:00,$AAPL Apple's PE (33.0x) is almost two standar...,investmenttalk.bsky.social
9,NaT,Apple's stock price (AAPL) dipped into the new...,asktraders.bsky.social


In [60]:
# Gemini Sentiment Analysis
class Sentiment(enum.Enum):
    POSITIVE = "positive"
    NEGATIVE = "negative"
    NEUTRAL = "neutral"
class AnylysisResult(TypedDict): 
    is_stock_related: bool
    sentiment: Sentiment

In [61]:
def analyze_post (content : str) -> AnylysisResult: 
    prompt = f'''
    Analyze following post and determine:
    1- whether it is related with the company, {ticker}, and related or discusses past, current or future stock performance of {ticker} explicitly.
    2- If related, classify the sentiment as positive, negative or neutral. 
    Post: {content}
    '''
    response = model.generate_content(
        prompt,
        generation_config=geai.GenerationConfig(
            response_mime_type="application/json",
            response_schema=AnylysisResult
        )
    )

    if response.candidates:
        candidate_content = response.candidates[0].content
        result_text = ''.join(part.text for part in candidate_content.parts)
        try:
            result = json.loads(result_text)
            is_stock_related = result.get("is_stock_related")
            sentiment = result.get("sentiment")
            if is_stock_related is not None and sentiment is not None:
                return is_stock_related, sentiment
            else:
                print("Missing expected keys in the response")
                return None,None
        except json.JSONDecodeError:
            print("Failed to decode JSON response")
            return None,None
    else:
        print("No candidates returned in the response")
        return None,None

In [62]:
# Apply the analysis to each post in the DataFrame
df[['is_stock_related', 'sentiment']] = df['Content'].apply(
    lambda x: pd.Series(analyze_post(x))
)

ResourceExhausted: 429 Resource has been exhausted (e.g. check quota).

In [11]:
df


Unnamed: 0,Date,Content,Author,is_stock_related,sentiment
0,2025-01-03 16:47:52.542000+00:00,💥 Institutions trading #options with high urge...,insiderfinance.com,True,neutral
1,2025-01-03 16:05:51.010000+00:00,China's a battlefield for Apple as Huawei gain...,kautious.bsky.social,True,negative
2,2025-01-03 16:00:11.649000+00:00,$AAPL: Apple stock dips 3% as tech giants face...,financepulsehq.bsky.social,True,negative
3,2025-01-03 15:43:43.395922+00:00,UBS Group AG shares rise 3.53% intraday after ...,fintwitter.bsky.social,True,neutral
4,2025-01-03 15:34:10.613260+00:00,Telephone And Data Systems shares rise 1.19% i...,fintwitter.bsky.social,True,positive
5,NaT,Past five days: \n\n$AAPL -5.5%\n$NVDA ...,morningbrew.extwitter.link,True,negative
6,2025-01-03 15:04:36.179000+00:00,$TSLA $AAPL $MSFT $DAL $BERY $ZIM $BABA $BAC $...,theoptionsbully2.bsky.social,True,positive
7,2025-01-03 14:52:02.184071+00:00,$AAPL Apple's PE (33.0x) is almost two standar...,investmenttalk.bsky.social,True,negative
8,NaT,Apple's stock price (AAPL) dipped into the new...,asktraders.bsky.social,True,negative
9,2025-01-03 14:37:20.069000+00:00,$AAPL to $SPX $SPY\n\nNot so fast MFer,uttammarketj.bsky.social,True,negative


In [None]:
# Filter out neutral sentiment
filtered_df = df[df['sentiment'] != 'neutral']

# Extract the date (day only) and calculate daily positive sentiment score
filtered_df['Day'] = filtered_df['Date'].dt.date
daily_sentiment = (
    filtered_df.groupby('Day')['sentiment']
    .apply(lambda x: (x == 'positive').sum() / len(x))
    .reset_index(name='positive_sentiment_score')
)

# Plot the daily sentiment score
fig = px.line(
    daily_sentiment,
    x='Day',
    y='positive_sentiment_score',
    title='Daily Positive Sentiment Score',
    labels={'positive_sentiment_score': 'Positive Sentiment Score', 'Day': 'Date'},
    markers=True,
)

fig.update_xaxes(dtick="D", tickformat="%Y-%m-%d")

fig

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Day'] = filtered_df['Date'].dt.date
