# Stock Sentiment Analyzer


In [1]:
import requests
import pandas as pd 
import json
import google.generativeai as geai
import enum
from typing_extensions import TypedDict
import plotly.express as px
import os
from dotenv import load_dotenv


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Env 
load_dotenv()

# BlueSky API
BLUESKY_HANDLE = os.getenv('USERNAME')
BLUESKY_PASSWORD = os.getenv('PASSWORD')

# Google API
geai.configure(api_key=os.environ['GEMINY_APY'])
print(BLUESKY_HANDLE, BLUESKY_PASSWORD)


ivo196.bsky.social kkfk-diqs-y6aa-qd2h


In [3]:
# Gemini model 
model = geai.GenerativeModel(model_name="gemini-1.5-flash")

In [4]:
# Stock ticker
ticker = "BTC"
# Numbers of the post to return 
n_posts = 10

In [5]:
# Bue Sky Web Scraper 
print(BLUESKY_HANDLE, BLUESKY_PASSWORD)
auth_response = requests.post("https://bsky.social/xrpc/com.atproto.server.createSession", json={"identifier": BLUESKY_HANDLE, "password": BLUESKY_PASSWORD})
auth_response.raise_for_status()
access_token = auth_response.json()["accessJwt"]
print(access_token)

ivo196.bsky.social kkfk-diqs-y6aa-qd2h
eyJ0eXAiOiJhdCtqd3QiLCJhbGciOiJFUzI1NksifQ.eyJzY29wZSI6ImNvbS5hdHByb3RvLmFwcFBhc3NQcml2aWxlZ2VkIiwic3ViIjoiZGlkOnBsYzp3bmZsbWN1aHNscmx2b2kzZ2l4Z2JuM3kiLCJpYXQiOjE3MzYwMTEwMjIsImV4cCI6MTczNjAxODIyMiwiYXVkIjoiZGlkOndlYjptb3R0bGVnaWxsLnVzLXdlc3QuaG9zdC5ic2t5Lm5ldHdvcmsifQ.yEfIKSWI0Jwz7WS649W-GBflqlbLsSa5ntaFJ9t96o3yMONuSCMj7bwRX5DXdHA4X9gVk2hn8VMxOVL2_i6Xuw


In [6]:
# Set up the headers for the API request
headers = {
    "Authorization": f"Bearer {access_token}"
}
# Define the search parameters
params = {
    "q" : ticker,
    "sort": "latest",
    "limit" : n_posts
}

search_response = requests.get("https://bsky.social/xrpc/app.bsky.feed.searchPosts",
                               headers=headers, 
                               params=params)
search_response.raise_for_status()
posts = search_response.json().get("posts",[])


In [7]:
# Extract data and create a list of dictionaries 

data = []
for post in posts:
    author = post.get("author", {}).get("handle", "Unknown")
    content = post.get("record", {}).get("text", "No content")
    created_at = post.get("record", {}).get("createdAt", "Unkown date")
    data.append({
        "Date": created_at,
        "Content": content,
        "Author": author,
    })
# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df


Unnamed: 0,Date,Content,Author
0,2025-01-04 17:16:50+00:00,比特币今日动态：BTC涨幅达3.78%！\r\n\r\nhttps://qian.cx/po...,qiancx.bsky.social
1,2025-01-04 17:16:46+00:00,"比特币价格分析：BTC逼近65,000美元；贝莱德的人工智能策略与Saylor的1620%收...",qiancx.bsky.social
2,2025-01-04 17:16:33+00:00,狗狗币（SHIB）模式失效，比特币（BTC）稳住关键价位，以太坊（ETH）在2400美元面临...,qiancx.bsky.social
3,NaT,"💱 Latest Bitcoin price: USD$ 97,758.00 \n📊 Cha...",btcinfo.bsky.social
4,2025-01-04 17:15:49+00:00,比特币价格分析：BTC飙升至$65K，下一个目标是什么？\r\n\r\nhttps://qi...,qiancx.bsky.social
5,NaT,\nBluesky's Top 10 Trending Words from the Pas...,nowbreezing.ntw.app
6,NaT,Bitcoin Boom Goes Mainstream—Blackrock’s IBIT ...,crypto.at.thenote.app
7,NaT,GM #Bsky #Crypto friends!\n\nI hope you took a...,cryptoclear.bsky.social
8,2025-01-04 17:05:44+00:00,比特币家族如何守护他们的BTC、ETH和LTC财富\r\n\r\nhttps://qian....,qiancx.bsky.social
9,NaT,$BTC Bitcoin Buyers Saw 40% Gain on Average La...,ka3nacheng.bsky.social


In [8]:
# Gemini Sentiment Analysis
class Sentiment(enum.Enum):
    POSITIVE = "positive"
    NEGATIVE = "negative"
    NEUTRAL = "neutral"
class AnylysisResult(TypedDict): 
    is_stock_related: bool
    sentiment: Sentiment

In [9]:
def analyze_post (content : str) -> AnylysisResult: 
    prompt = f'''
    Analyze following post and determine:
    1- whether it is related with the company, {ticker}, and related or discusses past, current or future stock performance of {ticker} explicitly.
    2- If related, classify the sentiment as positive, negative or neutral. 
    Post: {content}
    '''
    response = model.generate_content(
        prompt,
        generation_config=geai.GenerationConfig(
            response_mime_type="application/json",
            response_schema=AnylysisResult
        )
    )

    if response.candidates:
        candidate_content = response.candidates[0].content
        result_text = ''.join(part.text for part in candidate_content.parts)
        try:
            result = json.loads(result_text)
            is_stock_related = result.get("is_stock_related")
            sentiment = result.get("sentiment")
            if is_stock_related is not None and sentiment is not None:
                return is_stock_related, sentiment
            else:
                print("Missing expected keys in the response")
                return None,None
        except json.JSONDecodeError:
            print("Failed to decode JSON response")
            return None,None
    else:
        print("No candidates returned in the response")
        return None,None

In [10]:
# Apply the analysis to each post in the DataFrame
df[['is_stock_related', 'sentiment']] = df['Content'].apply(
    lambda x: pd.Series(analyze_post(x))
)

Missing expected keys in the response


In [11]:
df


Unnamed: 0,Date,Content,Author,is_stock_related,sentiment
0,2025-01-04 17:16:50+00:00,比特币今日动态：BTC涨幅达3.78%！\r\n\r\nhttps://qian.cx/po...,qiancx.bsky.social,True,positive
1,2025-01-04 17:16:46+00:00,"比特币价格分析：BTC逼近65,000美元；贝莱德的人工智能策略与Saylor的1620%收...",qiancx.bsky.social,True,positive
2,2025-01-04 17:16:33+00:00,狗狗币（SHIB）模式失效，比特币（BTC）稳住关键价位，以太坊（ETH）在2400美元面临...,qiancx.bsky.social,True,positive
3,NaT,"💱 Latest Bitcoin price: USD$ 97,758.00 \n📊 Cha...",btcinfo.bsky.social,True,negative
4,2025-01-04 17:15:49+00:00,比特币价格分析：BTC飙升至$65K，下一个目标是什么？\r\n\r\nhttps://qi...,qiancx.bsky.social,True,positive
5,NaT,\nBluesky's Top 10 Trending Words from the Pas...,nowbreezing.ntw.app,,
6,NaT,Bitcoin Boom Goes Mainstream—Blackrock’s IBIT ...,crypto.at.thenote.app,True,positive
7,NaT,GM #Bsky #Crypto friends!\n\nI hope you took a...,cryptoclear.bsky.social,True,positive
8,2025-01-04 17:05:44+00:00,比特币家族如何守护他们的BTC、ETH和LTC财富\r\n\r\nhttps://qian....,qiancx.bsky.social,True,neutral
9,NaT,$BTC Bitcoin Buyers Saw 40% Gain on Average La...,ka3nacheng.bsky.social,True,positive


In [12]:
# Filter out neutral sentiment
filtered_df = df[df['sentiment'] != 'neutral']

# Extract the date (day only) and calculate daily positive sentiment score
filtered_df['Day'] = filtered_df['Date'].dt.date
daily_sentiment = (
    filtered_df.groupby('Day')['sentiment']
    .apply(lambda x: (x == 'positive').sum() / len(x))
    .reset_index(name='positive_sentiment_score')
)

# Plot the daily sentiment score
fig = px.line(
    daily_sentiment,
    x='Day',
    y='positive_sentiment_score',
    title='Daily Positive Sentiment Score',
    labels={'positive_sentiment_score': 'Positive Sentiment Score', 'Day': 'Date'},
    markers=True,
)

fig.update_xaxes(dtick="D", tickformat="%Y-%m-%d")

fig

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Day'] = filtered_df['Date'].dt.date
