In [1]:
# VesterAI - Notebook 06: LLM-Powered Insight Generator

"""
Objective:
Use an LLM (e.g., LLaMA 2, GPT-style) to generate:
1. Daily summaries of sentiment and market data
2. Answer natural language questions about the dataset
3. Prepare for chatbot integration

Input: Sentiment + Stock data
Model: HuggingFace pipeline or LLaMA 2 (your setup)
"""

'\nObjective:\nUse an LLM (e.g., LLaMA 2, GPT-style) to generate:\n1. Daily summaries of sentiment and market data\n2. Answer natural language questions about the dataset\n3. Prepare for chatbot integration\n\nInput: Sentiment + Stock data\nModel: HuggingFace pipeline or LLaMA 2 (your setup)\n'

In [2]:
# Install transformers if needed
!pip install transformers --quiet

In [3]:
import pandas as pd
from datetime import datetime, timedelta

# Load latest model dataset
data = pd.read_csv("../data/processed/AAPL_model_data.csv", parse_dates=["Date"])
data = data.sort_values("Date").reset_index(drop=True)

# Choose a date (or use latest)
selected_date = data["Date"].iloc[-1]  # most recent available date
context_df = data[data["Date"] == selected_date]

if context_df.empty:
    selected_date = data["Date"].iloc[-2]
    context_df = data[data["Date"] == selected_date]

# Build input text for the model
row = context_df.iloc[0]
summary_context = f"""
Date: {row['Date'].strftime('%Y-%m-%d')}
Stock Close: {row['Close']:.2f}
Return: {row['return']:.4f}
Twitter Sentiment Score: {row['twitter_sentiment']:.2f}
News Sentiment Score: {row['news_sentiment']:.2f}
Reddit Sentiment Score: {row.get('reddit_sentiment', 0):.2f}
Technical Indicators:
- RSI: {row['rsi_14']:.2f}
- MACD: {row['macd']:.4f}
- OBV: {row['obv']:.2f}
"""
print(summary_context)


Date: 2025-03-24
Stock Close: 220.73
Return: 0.0113
Twitter Sentiment Score: 0.00
News Sentiment Score: 0.00
Reddit Sentiment Score: 0.00
Technical Indicators:
- RSI: 43.32
- MACD: -5.8843
- OBV: 3055157200.00



In [4]:
from transformers import pipeline

# Load a small LLM for generation
generator = pipeline("text-generation", model="gpt2", max_length=250)

# You can also plug in your own LLaMA 2 setup here

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [5]:
prompt = (
    "Based on the following market data, generate a summary insight about the day for investors:\n"
    + summary_context
)

response = generator(prompt, max_length=150, num_return_sequences=1)
print("Insight Generated:\n")
print(response[0]['generated_text'])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Insight Generated:

Based on the following market data, generate a summary insight about the day for investors:

Date: 2025-03-24
Stock Close: 220.73
Return: 0.0113
Twitter Sentiment Score: 0.00
News Sentiment Score: 0.00
Reddit Sentiment Score: 0.00
Technical Indicators:
- RSI: 43.32
- MACD: -5.8843
- OBV: 3055157200.00
- OOC: 23.86714
Note that the top 10 stocks are based on recent events (not any recent events at all). As the market moves in the next ten days, we believe it is likely that stock prices and


In [6]:
# You can also let the user ask a question about the day
question = "Was the sentiment bullish or bearish on this day?"

qa_prompt = (
    f"Context:\n{summary_context}\n\n"
    f"Q: {question}\n"
    f"A:"
)

response = generator(qa_prompt, max_length=100, num_return_sequences=1)
print("Answer:\n")
print(response[0]['generated_text'])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Input length of input_ids is 104, but `max_length` is set to 100. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.


Answer:

Context:

Date: 2025-03-24
Stock Close: 220.73
Return: 0.0113
Twitter Sentiment Score: 0.00
News Sentiment Score: 0.00
Reddit Sentiment Score: 0.00
Technical Indicators:
- RSI: 43.32
- MACD: -5.8843
- OBV: 3055157200.00


Q: Was the sentiment bullish or bearish on this day?
A: Bear


In [7]:
def generate_insight_summary(row):
    context = f"""
Date: {row['Date']}
Stock Close: {row['Close']:.2f}
Return: {row['return']:.4f}
Twitter Sentiment: {row['twitter_sentiment']:.2f}
News Sentiment: {row['news_sentiment']:.2f}
RSI: {row['rsi_14']:.2f}
MACD: {row['macd']:.4f}
"""
    prompt = "Summarize the market insight for this day:\n" + context
    return generator(prompt, max_length=100)[0]["generated_text"]

In [8]:
print("LLM Insight Generator Complete.")
print("You can now summarize any day or ask questions interactively.")

LLM Insight Generator Complete.
You can now summarize any day or ask questions interactively.
