<a href="https://colab.research.google.com/github/Gredmond/JupyterBooks/blob/main/CompanySetiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import pandas as pd
import google.generativeai as genai
import enum
from typing_extensions import TypedDict
import json
from google.colab import userdata

In [2]:
class Sentiment(enum.Enum):
    POSITIVE = "positive"
    NEGATIVE = "negative"
    NEUTRAL = "neutral"

class AnalysisResult(TypedDict):
    is_stock_related: bool
    sentiment: Sentiment

In [3]:
SEARCH_TERM = 'Rivian'
BLUESKY_HANDLE = userdata.get("BLUESKY_USER")
BLUESKY_PASSWORD = userdata.get("BLUESKY_CREDS")
GOOGLE_API_KEY = userdata.get("GOOGLE_API_KEY")

genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel("models/gemini-2.0-flash") # gemini-2.0-flash-exp

In [4]:


n = 100  # Number of latest posts to retrieve
# Authenticate and obtain access token
auth_response = requests.post(
    'https://bsky.social/xrpc/com.atproto.server.createSession',
    json={'identifier': BLUESKY_HANDLE, 'password': BLUESKY_PASSWORD}
)
auth_response.raise_for_status()
access_token = auth_response.json().get('accessJwt')

# Set up the request headers with the access token
headers = {'Authorization': f'Bearer {access_token}'}

# Define the search parameters
params = {
    'q': SEARCH_TERM,
    'sort': 'latest',
    'limit': n
}

# Perform the search request
search_response = requests.get(
    'https://bsky.social/xrpc/app.bsky.feed.searchPosts',
    headers=headers,
    params=params
)
search_response.raise_for_status()
posts = search_response.json().get('posts', [])

data = []
for post in posts:
    content = post.get('record', {}).get('text', 'No content')
    created_at = post.get('record', {}).get('createdAt', 'Unknown date')
    data.append({'Date': created_at, 'Content': content})



In [5]:
# Convert list of dictionaries to DataFrame
df = pd.DataFrame(data)

# Convert 'Date' column to datetime format for better handling
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Display the DataFrame
df.head(2)




Unnamed: 0,Date,Content
0,2025-02-28 17:48:23.380000+00:00,"""Your Outie drives a Rivian with steer horns o..."
1,2025-02-28 17:47:50.336000+00:00,"F Tesla, Rivian is where it’s at! 👏🏼💪🏼"


In [6]:


def analyze_post(content: str) -> AnalysisResult:
    prompt = f"""
    company is stock ticker {SEARCH_TERM}:
    Analyze the following post and determine:
    1. Whether it is related to the company stock, or relates to or discusses past, current, or future stock performance of company explicitly
    or it relates to company's products in that case it is not stock related but should still be analyzed.
    2. Classify the sentiment as positive, negative, or neutral.

    Post: "{content}"
    """
    response = model.generate_content(
        prompt,
        generation_config=genai.GenerationConfig(
            response_mime_type="application/json",
            response_schema=AnalysisResult
        )
    )
    if response.candidates:
        candidate_content = response.candidates[0].content
        result_text = ''.join(part.text for part in candidate_content.parts)
        try:
            result = json.loads(result_text)
            is_stock_related = result.get('is_stock_related')
            sentiment = result.get('sentiment')
            if is_stock_related is not None and sentiment is not None:
                return is_stock_related, sentiment
            else:
                # print("Missing expected keys in the response")
                return None, None
        except json.JSONDecodeError:
            print("Failed to decode JSON response")
            return None, None
    else:
        print("No candidates returned")
        return None, None
# Apply the analysis to each post
df[['is_stock_related', 'sentiment']] = df['Content'].apply(
    lambda x: pd.Series(analyze_post(x))
)

In [7]:

df.sample(12)

Unnamed: 0,Date,Content,is_stock_related,sentiment
77,2025-02-27 22:24:01.053000+00:00,"I love my #Rivian eSUV. It's powerful, fun to ...",False,positive
42,2025-02-28 12:51:13.865000+00:00,They probably feel like trash because they are...,False,positive
94,2025-02-27 18:39:15.813000+00:00,I put a 2025 Rivian R1T through its paces a fe...,False,positive
74,2025-02-27 22:44:17.466000+00:00,"It’s funny, I joined several Rivian EV forums,...",False,negative
48,2025-02-28 07:19:24.446000+00:00,“Electric trucks aren’t capable of real work l...,False,negative
21,2025-02-28 15:46:55.947000+00:00,I haven't heard good things about Rivian or Volvo,False,negative
58,2025-02-28 03:56:18.148000+00:00,Rivian Moves Closer to Profitability but Faces...,True,neutral
67,2025-02-28 00:52:39.427000+00:00,"Buy… a rivian, I guess? 🤷‍♂️",False,neutral
6,2025-02-28 17:01:08.860000+00:00,Mountain Pass Performance has a pre-sale up fo...,False,neutral
86,2025-02-27 21:33:48.665000+00:00,Rivian’s Palo Alto office is a few blocks from...,False,neutral


In [8]:
df = df.drop(columns=['Content'])

# Display the updated DataFrame
df.sample(12)




Unnamed: 0,Date,is_stock_related,sentiment
70,2025-02-27 23:10:54.779000+00:00,False,positive
78,2025-02-27 22:22:19.956000+00:00,True,neutral
41,2025-02-28 13:42:20.616000+00:00,True,neutral
13,2025-02-28 16:32:07.519000+00:00,False,positive
19,2025-02-28 15:55:47.673000+00:00,False,positive
92,2025-02-27 18:44:23.368000+00:00,False,positive
48,2025-02-28 07:19:24.446000+00:00,False,negative
2,2025-02-28 17:19:47.760000+00:00,False,neutral
52,2025-02-28 04:32:41.157000+00:00,False,neutral
98,2025-02-27 18:20:00.505000+00:00,False,neutral


In [9]:
# prompt: each date I want the sentiment row to pivot showing the count of 'positive' and 'negative' values

# Assuming 'df' is the DataFrame from the previous code

# Convert 'Date' column to date only (remove time component)
df['Date'] = df['Date'].dt.date

# Pivot the table
pivot_df = pd.pivot_table(df, values='sentiment', index='Date', columns='sentiment', aggfunc='count', fill_value=0)

# Display the pivoted DataFrame
pivot_df


sentiment,negative,neutral,positive
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-02-27,7,14,10
2025-02-28,11,29,29


In [10]:
df.insert(df.columns.get_loc('Date') + 1, 'Company', SEARCH_TERM)


In [11]:
df

Unnamed: 0,Date,Company,is_stock_related,sentiment
0,2025-02-28,Rivian,False,neutral
1,2025-02-28,Rivian,False,positive
2,2025-02-28,Rivian,False,neutral
3,2025-02-28,Rivian,False,neutral
4,2025-02-28,Rivian,False,positive
...,...,...,...,...
95,2025-02-27,Rivian,True,positive
96,2025-02-27,Rivian,True,negative
97,2025-02-27,Rivian,False,neutral
98,2025-02-27,Rivian,False,neutral
