In [None]:
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

<h1 style="text-align:center; color:#1f77b4; font-size: 42px;">
  📈 Stock Price Prediction Using LSTM and News Sentiment
</h1>

<p style="text-align:center; font-size: 18px; color:#555;">
  Unlock insights from market data and financial news using Deep Learning and Generative AI. <br>
  This project combines <b>LSTM-based time series forecasting</b> with <b>news sentiment analysis</b> to predict stock prices more intelligently.
</p>

<hr style="height:2px; border-width:0; background-color:#1f77b4;">

<h2 style="color:#8B4513;font-weight:bold">1. Project Overview</h2>
<ul>
  <li>📊 Historical stock data processing and visualization</li>
  <li>🧠 Deep learning with LSTM for price prediction</li>
  <li>📰 News Headlines sentiment analysis and Earning Reports understanding with GenAI</li>
</ul>
<p><strong>Key GENAI capabilities in this Project:</strong></p>
<ol>
  <li>Structured output/controlled generation</li>
  <li>Document understanding</li>
  <li>AI Agent</li>
</ol>


In [None]:
#Reinstall latest yfinance version, as the default yfinance from Kaggle is v0.2.50 which no longer works

import os
import subprocess

print("Reinstalling the yfinance v0.2.55")
subprocess.run(["pip", "uninstall", "-y", "yfinance"])
subprocess.run(["pip", "install", "yfinance"])
import yfinance as yf

if yf.__version__ == '0.2.55': 
    print("You're good to go")
else: 
    print("Reinstalling the yfinance v0.2.55")
    subprocess.run(["pip", "uninstall", "-y", "yfinance"])
    subprocess.run(["pip", "install", "yfinance"])
    print("Restarting the kernel...")
    os._exit(0)
    if yf.__version__ == '0.2.55': 
        print("good")

In [None]:
!pip install mlflow --quiet
!pip install langgraph --quiet
!pip install -qU 'langgraph==0.3.21' 'langchain-google-genai==2.1.2' 'langgraph-prebuilt==0.1.7'

In [None]:
import tensorflow as tf
from google import genai
from google.genai import types
import numpy as np
import pandas as pd
from IPython.display import Markdown, HTML, display
import re
import ast
from rich.markdown import Markdown
from rich.console import Console
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, GridSearchCV
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
import requests
from sklearn.preprocessing import StandardScaler
from bs4 import BeautifulSoup as bs
import json
from datetime import datetime, timedelta
import mlflow
from mlflow.models.signature import infer_signature
import tempfile
import json
from typing import Annotated, Optional, Literal
from typing_extensions import TypedDict
from langgraph.graph.message import add_messages
from langgraph.graph import StateGraph, START, END
from langchain_google_genai import ChatGoogleGenerativeAI
from IPython.display import Image, display
from pprint import pprint
from langchain_core.messages.ai import AIMessage
from langchain_core.tools import tool
from langgraph.prebuilt import ToolNode
from collections.abc import Iterable
from random import randint
from langchain_core.messages.tool import ToolMessage

In [None]:
#Set up API-Key
from kaggle_secrets import UserSecretsClient
GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")
client = genai.Client(api_key=GOOGLE_API_KEY)
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

<h2 style="color:#9932CC;font-weight: bold">2. Data Collection & Preparation</h2>
<p style="font-size: 18px">
  Data collection is a crucial step in building a robust stock market prediction model.
  The quality and scope of the data used can significantly impact the model's accuracy. In this section, we gather both historical stock market     data and real-time financial news articles to create a comprehensive dataset for training the prediction model.
</p>
<h3 style="color:#A52A2A">Sources : </h3>
<dl>
    <dt>The Stock data prices</dt>
    <dd>-  collected from 2008 until today with yfinance API</dd>
    <dt>Article News </dt>
    <dd>-  using bs4 and request to scratch news headlines from website like finance.yahoo.com</dd>
</dl>
<h3 style="font-size: 18px !important">
    
  Datas need to be processed before feeding it to the ML Model.
  This section also covers the process of cleaning and transforming raw data into a structured and usable format. 
</h3>

In [None]:
#Extracting financial news headlines
def get_yfinance_headlines(ticker): 
    url = f"https://finance.yahoo.com/quote/{ticker}/latest-news/"
    headers = {'User-Agent' : 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    soup = bs(response.text, "html.parser")
    table = soup.find("ul", class_="stream-items yf-1usaaz9")
    header = []
    if table: 
        rows = table.findAll("li")
        for row in rows: 
            try: 
                subtle_link = row.find("a", class_="subtle-link")
                title = subtle_link.get("title")
                publisher_and_date = row.find("div", class_="publishing").get_text(strip=True)
                date = re.findall(r"(?<=•).+", publisher_and_date)[0]
                if(re.findall(r"hours|minute|second", date)):
                    date = "today"
                header.append((title, date))
            except: 
                continue
    return header

In [None]:
#Getting Stock Data with yfinance API
def get_stock_data(ticker, start_date, end_date = None, interval="1d"): 
    if end_date is None: 
        end_date = datetime.today().strftime('%Y-%m-%d')
    stock_data = yf.download(ticker, start=start_date, end=end_date, interval=interval)
    return stock_data

#Formatting the stock data so that it is suitable for training model
#time_step : how many days needed to predict the closing price of one day
def prepare_data(df, feature_column=['Open', 'Low', 'High',  'Close'], target_column='Close', time_step=30): 
    x, y, dates = [], [], []
    for i in range(time_step, len(df)): 
        x.append(df[feature_column].iloc[i-time_step:i].values)
        y.append(df[target_column].iloc[i])
        dates.append(df.index[i])
    return np.array(x), np.array(y), np.array(dates)

In [None]:
#preparing the latest predict datas
def prepare_date_predict(ticker, feature_column=['Open', 'Low', 'High',  'Close']): 
    end_date = datetime.today().strftime('%Y-%m-%d')
    start_date = pd.to_datetime(end_date) - pd.Timedelta(days=100)
    start_date = start_date.strftime('%Y-%m-%d')
    last_14days = get_stock_data(ticker, start_date, end_date)
    df = last_14days.tail(30)
    x, dates = [], []
    for i in range(len(df)): 
        x.append(df[feature_column].iloc[i].values)
        dates.append(df.index[i])
    return np.array(x), np.array(dates)

#unscale the scaled predicted data
def predictor(best_model, x_scaler, y_scaler, x_pred): 
    x_pred_scaled = x_scaler.transform(x_pred.reshape(-1, x_pred.shape[-1]))
    x_pred_scaled = x_pred_scaled.reshape(1, x_pred.shape[0], x_pred.shape[1])
    y_pred_scaled = best_model.predict(x_pred_scaled)
    y_pred = y_scaler.inverse_transform(y_pred_scaled)
    return y_pred

<h2 style="color:#556B2F;font-weight: bold">3. RNN Deep Learning Model (LSTM)</h2>
<p style="font-size:17px">
    In this section, we employ a Long-Short-Term-Memory (LSTM) model for predicting stock prices based on historical data. 
    As LSTMs are well suited for time-series forecasting, as they can capture long-term dependencies in sequential data. 
</p>

In [None]:
#XGBRegressor for stock predicting
def fit_and_train_xgb(x_train, y_train, x_test, y_test, dates_test): 
    x_train = x_train.reshape(x_train.shape[0], -1)
    x_test = x_test.reshape(x_test.shape[0], -1)
    
    param_grid = {
        'n_estimators': [100, 1000], 
        'learning_rate': [0.01, 0.05, 0.1, 0.3], 
        'max_depth':[3, 5, 7],
        'subsample':[0.8, 1.0]
    }
    
    #Using XGBRegressor model to predict closing stock price
    xgb_model = XGBRegressor(n_estimators=1000, learning_rate=0.1)

    #Gridsearch all variations of param_grid to find the best model
    grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=3, scoring="neg_mean_squared_error", n_jobs=-1, verbose=1)

    #Fitting the datas to Gridsearch
    grid_search.fit(x_train, y_train)

    #save the best model from the Gridsearch
    best_model = grid_search.best_estimator_

    #Predicting y paramter(closing stock price)
    y_pred = best_model.predict(x_test) 
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    #Printing out the RMSE (Root Mean Squared Error)
    print(f"RMSE (XGBRegressor) : {rmse}")
    plt.figure(figsize=(18, 6))
    plt.plot(dates_test, y_test, label='Actual', color='blue', marker='o')
    plt.plot(dates_test, y_pred, label='Predicted', color='orange', marker='x')
    plt.title('Stock Price Prediction')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.grid(True)
    plt.legend()
    plt.show()

In [None]:
#Using RNN Model with LSTM layers
def fit_and_plot(x_train, y_train, x_test, y_test, dates_test, learning_rate, run_name="lstm_run_1"): 
    #Experiment tracking, logging model parameters, metrics and saving the model
    with mlflow.start_run(run_name=run_name):
        #Model Layers
        model = tf.keras.models.Sequential([
            tf.keras.layers.LSTM(258, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2])), 
            tf.keras.layers.Dropout(0.2), 
            tf.keras.layers.LSTM(64), 
            tf.keras.layers.Dropout(0.2), 
            tf.keras.layers.Dense(1)
        ])
        
        #Early stop with monitoring mean_squared_error
        early_stop = tf.keras.callbacks.EarlyStopping(monitor='mae', patience=50, restore_best_weights=True)
        
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

        #Saving the best model
        checkpoint = tf.keras.callbacks.ModelCheckpoint('best_model.keras', monitor='mae', save_best_only=True, mode='min', verbose=1)

        #Compiling the model in
        model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=["mae"])
        
        #Standard Scaling the datasets for better result
        num_train_samples, time_steps, num_features = x_train.shape
        x_train_reshaped = x_train.reshape(-1, num_features)
        x_scaler = StandardScaler()
        x_train_scaled = x_scaler.fit_transform(x_train_reshaped)
        x_train_scaled = x_train_scaled.reshape(num_train_samples, time_steps, num_features)
        x_test_scaled = x_scaler.transform(x_test.reshape(-1, num_features))
        x_test_scaled = x_test_scaled.reshape(x_test.shape[0], time_steps, num_features)
        y_scaler = StandardScaler()
        y_train_scaled = y_scaler.fit_transform(y_train.reshape(-1, 1))
        y_test_scaled = y_scaler.transform(y_test.reshape(-1, 1))

        #Log learning_rate parameter with MLflow
        input_shape=(x_train.shape[1], x_train.shape[2])
        mlflow.log_param("learning_rate", learning_rate)
        
        #fitting the data to the model
        history = model.fit(x_train_scaled, y_train_scaled, epochs=200, batch_size=16, callbacks=[early_stop, checkpoint],  verbose=0)

        #Predict the closing stock price
        y_pred = model.predict(x_test_scaled)
        y_test = y_scaler.inverse_transform(y_test_scaled)
        y_pred = y_scaler.inverse_transform(y_pred)

        #predict the closing stock price using best model
        best_model = tf.keras.models.load_model('best_model.keras')
        best_y_pred = best_model.predict(x_test_scaled)
        y_test = y_scaler.inverse_transform(y_test_scaled)
        best_y_pred = y_scaler.inverse_transform(best_y_pred)
        
        #Evaluation
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mae = np.mean(np.abs(y_test - y_pred))
        best_rmse = np.sqrt(mean_squared_error(y_test, best_y_pred))

        #Log Metrics
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mae", mae)

        #Printing the RMSE scores
        print(f"RMSE : {rmse:.4f}")
        print(f"Best RMSE: {best_rmse:.4f}")
        plt.figure(figsize=(18, 6))
        plt.plot(dates_test, y_pred, label="Predicted", marker='o')
        plt.plot(dates_test, y_test, label="Actual", marker='x')
        plt.title('Stock Price Prediction')
        plt.xlabel('Date')
        plt.xlabel('Price')
        plt.grid(True)
        plt.legend()

        #Save model to MLFlow
        input_example = x_train[0:1]
        #mlflow.keras.log_model(model, "model", input_example=input_example)
        with tempfile.TemporaryDirectory() as tmp_dir: 
            model_path = os.path.join(tmp_dir, "model.keras")
            model.save(model_path)
            mlflow.keras.log_model(tf.keras.models.load_model(model_path), artifact_path="model")
        return best_model, x_scaler, y_scaler

<h2 style="color:#191970; font-weight: bold">4. Document understanding</h2>
<p style="font-size:17px; font-family: Arial">Document understanding involves extracting valuavle information from unstructured text, such as financial report, annual earnings to identify key financial figures. </p>

In [None]:
#Analyze financial/Annual report
def analyze_report(url): 
    prompt1 = """
    You are an expert financial analyst.
    
    Your task is to analyze the following financial report from the link provided ad the end and provide a summary that includes:
    
    1. Key financial figures (e.g. revenue, net income, profit margins, EBITDA, cash flow, assets, liabilities, etc.).
    2. Trends or changes compared to previous periods (if available).
    3. Financial ratios (e.g. current ratio, debt-to-equity, return on equity).
    4. Identified risks or red flags (e.g. debt levels, declining revenue, operational issues).
    5. A brief overall financial health evaluation of the company.
    
    Please base your analysis only and only on the content of the provided report or data. """
    prompt = prompt1 + "\n" + f"This is the links : {url}"

    #Using genai to analyze the report
    response = client.models.generate_content(
        model='gemini-1.5-pro', 
        contents=prompt, 
        
    )
    return response.candidates[0].content.parts

<h2 style="color:#800000; font-weight: bold">5. Sentiment scoring with GenAI</h2>
<p style="font-size:17px; font-family: Arial">Sentiment scoring plays a crucial role inunderstanding market sentiment and its impact on stock prices or how the people react to the news.</p>

In [None]:
def sentiment_score(title_and_date, ticker): 
    model_config = types.GenerateContentConfig(
        temperature=0.1, 
        top_p=1, 
        max_output_tokens=300
    )

    #Processing the data inside python dict from scraped news headlines
    news_dict = {}
    for title, date in title_and_date: 
        if date == 'today': 
            date = datetime.today().strftime('%Y-%m-%d')
            if date not in news_dict: 
                news_dict[date] = []
            news_dict[date].append(title)
        else: 
            if date == 'yesterday': 
                days_ago=1
            else: 
                days_ago = int(re.findall("\d+", date)[0])
            date = pd.to_datetime(datetime.today().strftime('%Y-%m-%d')) - pd.Timedelta(days=days_ago)
            date = date.strftime('%Y-%m-%d')
            if date not in news_dict: 
                news_dict[date] = []
        news_dict[date].append(title)
    sentiment_results = []

    #Using GenAI to find the sentiment score, keywords and sentiment_summary
    for date, news in news_dict.items(): 
        headlines = "\n".join([f"- {h}" for h in news])
        zero_shot_prompt = f"""Analyze the sentiment of this lists of financial news headline about {ticker}. 
        Respond ONLY in this dictionary format:
        {{
          "sentiment": "positive" | "negative" | "neutral",
          "confidence": float (between 0.0 and 1.0),
          "keywords": ["keyword1", "keyword2", ...], 
          "sentiment_summary" : "Short explanation of the sentiment based on keywords or tone in the headlines", 
          "date": {date}
        }}
        Headline: {headlines}"""
        response = client.models.generate_content(
            model='gemini-2.0-flash', 
            config = model_config, 
            contents=zero_shot_prompt
        )
        raw_text = response.text.strip()
        raw_text = re.sub(r"```json\s*|\s*```", "", raw_text.strip())
        try: 
            parsed = json.loads(raw_text)
            parsed["date"] = date
            sentiment_results.append([parsed])
        except json.JSONDecodeError as e: 
            print(f"Failed to parse JSON for date {date}: \n{response.text}\nError: {e}")
    return sentiment_results

<h2 style="color:#800080; font-weight: bold">6. AI Agent</h2>
<p style="font-size:17px; font-family: Arial">In this Section, we are implementing the use of AI Agents. 
The Agent will answer questions like "Why is the Stock up today?"</p>

In [None]:
class QuestionState(TypedDict): 
    """State representing the user's stock-related question conversation."""

    #The conversation history between the user and the Stockbot
    messages: Annotated[list, add_messages]

    #Ticker Symbol of the Stock(ex: 'NVDA', 'AAPL')
    stock_ticker: Optional[str]

    #Type of question being asked
    question_type: Literal[
        "price_change_change_reason", #e.g, "Why is the stock down?"
        "forecast", #e.g, "What will be the closing price of today?"
        "sentiment", #e.g, "How is the market reacting?"
        "financials", #e.g, "How did they perform last year?"
    ]

    #Direction of stock movement mentioned
    direction: Optional[Literal["up", "down"]]

    #date_mentioned: Optional[date]
    #date_mentioned: Optional["date"]

    #Flag indicating that the bot has answered the question and is done
    finished: bool

#This system instruction defines how the chatbot react
#rules for what is permitted for discussion
STOCKBOT_SYSINT = (
    "system",
    "You are StockBot, a smart financial assistant that helps users understand stock prices, market movement, and news sentiment.\n\n"

    "Your job is to answer questions like:\n"
    "- 'Why is NVDA up today?'\n"
    "- 'What’s the forecast for AAPL?'\n"
    "- 'Give me the latest on TSLA.'\n\n"

    "You have access to these tools:\n"
    "- `get_stock_info(ticker: str)`: Gets live market data and predicted closing price.\n"
    "- `reason_for_price_move(ticker: str)`: Analyzes today's financial headlines to explain sentiment behind a stock’s movement.\n"
    "- `stock_price_prediction()`: Returns today's predicted closing stock price.\n\n"
    "- `annual_report_analyzer(url)`: Returns analyzed Annual reports of the stocks."

    "Instructions:\n"
    "1. If a user asks why a stock is moving, use `reason_for_price_move(ticker: str)` with the correct ticker.\n"
    "2. If the user is asking for a forecast or prediction, use `stock_price_prediction()`.\n"
    "3. Always mention whether your information comes from live data, prediction models, or news analysis.\n"
    "4. Only answer questions related to stocks or finance. If the question is off-topic, politely redirect.\n"
    "5. When the user’s question is fully answered, mark the state as `finished = True`.\n\n"

    "Do not make up numbers. Always use tool outputs for prices or sentiment.\n"
    "Keep responses helpful, short, and clearly sourced (e.g., 'based on today’s news' or 'our price prediction model')."
)


WELCOME_MSG = "Hello There! I'm Stockbot, ready to answer your questions regarding the stock. Type 'q' to quit!"

In [None]:
@tool
def get_stock_info(ticker: str): 
    """Fetch the latest stock market data for a given ticker symbol (e.g., NVDA, AAPL)."""
    x_pred, dates_pred= prepare_date_predict(ticker)
    today_close_predicted = predictor(best_model, x_scaler, y_scaler, x_pred)
    try: 
        #Fetch real time data
        stock = yf.Ticker(ticker)
        info = stock.info

        #Extract key metrics (N/A if missing)
        name = info.get('shortName', ticker.upper())
        price = info.get('currentPrice', 'N/A')
        change = info.get('regularMarketChange', 'N/A')
        change_percent = info.get('regularMarketChangePercent', 'N/A')
        volume = info.get('regularMarketVolume', 'N/A')
        market_cap = info.get('marketCap', 'N/A')
        pe_ratio = info.get('trailingPE', 'N/A')
        predicted_close = today_close_predicted[0].item()

        #Format volume and market cap for readability
        def format_number(num): 
            if isinstance(num, (int, float)): 
                if num >= 1e12: 
                    return f"${num/1e12:.2f}T"
                elif num >= 1e9: 
                    return f"${num/1e9:.2f}B"
                elif num >= 1e6: 
                    return f"${num/1e6:.2f}M"
                else: 
                    return f"${num:,.2f}"
            return num
        volume_str = format_number(volume) if volume != 'N/A' else 'N/A'
        market_cap_str = format_number(market_cap) if market_cap != 'N/A' else 'N/A'

        return f"""
        LIVE STOCK DATA ({ticker.upper()} - {name}):
        - Current Price: ${price if isinstance(price, str) else f'{price:.2f}'}
        - Change: ${change if isinstance(change, str) else f'{change:.2f}'} ({change_percent if isinstance(change_percent, str) else f'{change_percent:.2f}'}%)
        - Volume: {volume_str}
        - Market Cap: {market_cap_str}
        - P/E Ratio: {pe_ratio if isinstance(pe_ratio, str) else f'{pe_ratio:.2f}'}
        - Predicted closing price as of today : {predicted_close}
        """
    except Exception as e:
        return f"Error fetching data for {ticker.upper()}: {str(e)}. Check if the ticker is valid."
        

In [None]:
@tool
def stock_price_prediction(): 
    """predict the closing price as of today"""
    predicted_close = today_close_predicted[0].item()
    return f"The predicted closing stock price for today is ${predicted_close:.2f}."

@tool(args_schema={"ticker": {"type": "string", "description": "Stock ticker symbol like NVDA or AAPL"}})
def reason_for_price_move(ticker): 
    """Analyze today's headlines to determine the sentiment behind the stock's movement.
    Returns a summary of sentiment, confidence, and keywords"""
    
    headlines = get_yfinance_headlines(ticker)
    today_headlines = [title for title, date in headlines if date == 'today'] 
    
    if not today_headlines: 
       return f"No recent news found today for {ticker.upper()}." 
    sentiment_results = sentiment_score(headlines, ticker)
    
    response = f"Sentiment analysis for {ticker.upper()} based on recent news:\n"
    for entry in sentiment_results: 
        result = entry[0]
        sentiment = result.get("sentiment", "N/A")
        confidence = result.get("confidence", 0.0)
        summary = result.get("sentiment_summary", "")
        keywords = ", ".join(result.get("keywords", []))
        date = result.get("date", "N/A")
        break
    response += f"""
            🗓️ Date: {date}
            - Sentiment: {sentiment.title()} (Confidence: {confidence:.2f})
            - Keywords: {keywords}
            - Summary: {summary}
            """
    return response.strip()

@tool(args_schema={"url": {"type": "string", "description": "Link to the Annual/Financial report"}})
def annual_report_analyzer(url): 
    """Analyze the financial report """
    return analyze_report(url)

In [None]:
config={"recursion_limit": 100}
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")


# Combine all tools for LLM binding AND tool node
all_tools = [get_stock_info, stock_price_prediction, reason_for_price_move, annual_report_analyzer]
llm_with_tools = llm.bind_tools(all_tools)
tool_node = ToolNode(all_tools)  # Now handles all tools


def human_node(state: QuestionState): 
    """Display the last model message to the user, and receive the user's input."""
    last_msg = state["messages"][-1]
    print("Model:", last_msg.content)
    user_input = input("User: ")

    #Inputs for quit
    if user_input in {"q", "quit", "exit", "goodbye "}: 
        state["finished"] = True

    return state | {"messages": [("user", user_input)]}

def chatbot_with_tools(state: QuestionState): 
    """The chatbot itself. A wrapper around the model's own chat interface."""

    defaults = {"finished": False}
    
    if state["messages"]: 
        #If there are messages, continue the conversation with the Gemini model.
        new_output = llm_with_tools.invoke([STOCKBOT_SYSINT] + state["messages"])
    else: 
        new_output = AIMessage(content=WELCOME_MSG)
    return defaults | state | {"messages": [new_output]}

def maybe_exit_human_node(state: QuestionState) -> Literal["chatbot", "__end__"]: 
    if state.get("finished", False): 
        return END
    else: 
        return "chatbot"

def maybe_route_to_tools(state: QuestionState) -> Literal["tools", "human"]: 
    """Route between human or tool nodes, depending if a tool call is made"""
    if not (msgs:= state.get("messages", [])): 
        raise ValueError(f"No messages found when parsing state: {state}")
    msg = msgs[-1]

    #When the chatbot returns tool_calls, route to the "tools" node.
    if state.get("finished", False): 
        return END
    elif hasattr(msg, "tool_calls") and len(msg.tool_calls) > 0: 
        if any(tool["name"] in tool_node.tools_by_name.keys() for tool in msg.tool_calls): 
            return "tools"
        else: 
            return "human"
    else: 
        return "human"


#Building a new graph
graph_builder = StateGraph(QuestionState)

#add the chatbot and human node to the graph
graph_builder.add_node("chatbot", chatbot_with_tools)
graph_builder.add_node("human", human_node)
graph_builder.add_node("tools", tool_node)

#make chatbot the entrypoint
graph_builder.add_edge(START, "chatbot")

#Chatbot will always go to human node
graph_builder.add_edge("tools", "chatbot")

#Chatbot may go back 
graph_builder.add_conditional_edges("human", maybe_exit_human_node)
#Chatbot may go to tools or human
graph_builder.add_conditional_edges("chatbot", maybe_route_to_tools)

graph_with_menu = graph_builder.compile()

<h2 style="color:#8B0000; font-weight: bold">7. Deployment</h2>
<p style="font-size:17px; font-family: Arial">In this section, we will focus on deploying the entire stock market prediction system, integrating all the components discussed earlier. This includes the final model, data processing pipelines, and AI agents, ensuring they operate seamlessly in a production environment. The deployment process involves setting up cloud infrastructure, automating data collection and analysis, and creating a user-friendly interface for real-time stock predictions. By the end of this section, the system will be fully operational and capable of making live predictions based on incoming data.
</p>

In [None]:
#Creating training and testing dataset
ticker = "NVDA"
start_date = "2008-01-01"
end_date = datetime.today().strftime('%Y-%m-%d')
df = get_stock_data(ticker, start_date, end_date)
x, y, dates = prepare_data(df)
x_train, x_test, y_train, y_test, dates_train, dates_test = train_test_split(x, y, dates, test_size=30/len(x), shuffle=False)

#fit_and_train_xgb(x_train, y_train, x_test, y_test, dates_test)
print(f"\n\nTraining the model takes several minutes... \nPlease Wait..")
best_model, x_scaler, y_scaler = fit_and_plot(x_train, y_train, x_test, y_test, dates_test, 0.0009, run_name="lstm_lr_001")

#preparing datas to predict the closing price
x_pred, dates_pred= prepare_date_predict(ticker)
today_close_predicted = predictor(best_model, x_scaler, y_scaler, x_pred)

In [None]:
#Creating training and testing dataset
headlines = get_yfinance_headlines(ticker)
sentiment_json = sentiment_score(headlines, ticker)
ticker=ticker
model_name="LSTM"
time_steps=30
rmse=5.5476
predicted_price = today_close_predicted[0].item()
prediction_date = datetime.today().strftime('%Y-%m-%d')
sentiment = sentiment_json[0][0]["sentiment"]
sentiment_summary = sentiment_json[0][0]["sentiment_summary"]
keywords = sentiment_json[0][0]["keywords"]
notes = "Model trained on data from 2008-01-01 to 2025-03-03. Sentiment derived from financial news articles."
report = {
    "ticker": ticker,
    "date_generated": datetime.today().strftime('%Y-%m-%d'),
    "model_info": {
        "model_name": model_name,
        "time_steps": time_steps,
        "rmse": rmse,
    },
    "latest_prediction": {
        "date": prediction_date,
        "predicted_close_price": predicted_price,
    },
    "sentiment_analysis": {
        "overall_sentiment": sentiment,
        "summary": sentiment_summary,
        "keywords": keywords
    },
    "notes": notes
}

print(json.dumps(report, indent=4))

with open("stock_report_14-04.json", "w") as f: 
    json.dump(report, f, indent=4)

    
#Uncomment this line to manually ask question to the stockbot
#graph_with_menu.invoke({"messages": []}, config)

<h2 style="color:#7CFC00; font-weight: bold">8. Demo</h2>
<p>Demo run for the this program, so it runs end-to-end without error/taking manual inputs from user</p>

In [None]:
questions = [
    "Hi, what can you do?", 
    "Gives me NVDA Stock Info", 
    "Why is the NVDA stock down today?", 
    "Analyze Annual report for me", 
    "https://s201.q4cdn.com/141608511/files/doc_financials/2024/ar/NVIDIA-2024-Annual-Report.pdf", 
    "why is NVDA up/down today?"
]

def question_generator(questions):
    for q in questions:
        yield q
    yield "exit"  # fallback so it eventually exits

demo_inputs = question_generator(questions)

In [None]:
config={"recursion_limit": 100}
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")


# Combine all tools for LLM binding AND tool node
all_tools = [get_stock_info, stock_price_prediction, reason_for_price_move, annual_report_analyzer]
llm_with_tools = llm.bind_tools(all_tools)
tool_node = ToolNode(all_tools)  # Now handles all tools


def human_node(state: QuestionState): 
    """Display the last model message to the user, and receive the user's input."""
    last_msg = state["messages"][-1]
    print("Model:", last_msg.content)
    user_input = next(demo_inputs)
    print("User :", user_input)
    #Inputs for quit
    if user_input in {"q", "quit", "exit", "goodbye "}: 
        state["finished"] = True

    return state | {"messages": [("user", user_input)]}

def chatbot_with_tools(state: QuestionState): 
    """The chatbot itself. A wrapper around the model's own chat interface."""

    defaults = {"finished": False}
    
    if state["messages"]: 
        #If there are messages, continue the conversation with the Gemini model.
        new_output = llm_with_tools.invoke([STOCKBOT_SYSINT] + state["messages"])
    else: 
        new_output = AIMessage(content=WELCOME_MSG)
    return defaults | state | {"messages": [new_output]}

def maybe_exit_human_node(state: QuestionState) -> Literal["chatbot", "__end__"]: 
    if state.get("finished", False): 
        return END
    else: 
        return "chatbot"

def maybe_route_to_tools(state: QuestionState) -> Literal["tools", "human"]: 
    """Route between human or tool nodes, depending if a tool call is made"""
    if not (msgs:= state.get("messages", [])): 
        raise ValueError(f"No messages found when parsing state: {state}")
    msg = msgs[-1]

    #When the chatbot returns tool_calls, route to the "tools" node.
    if state.get("finished", False): 
        return END
    elif hasattr(msg, "tool_calls") and len(msg.tool_calls) > 0: 
        if any(tool["name"] in tool_node.tools_by_name.keys() for tool in msg.tool_calls): 
            return "tools"
        else: 
            return "human"
    else: 
        return "human"


#Building a new graph
graph_builder = StateGraph(QuestionState)

#add the chatbot and human node to the graph
graph_builder.add_node("chatbot", chatbot_with_tools)
graph_builder.add_node("human", human_node)
graph_builder.add_node("tools", tool_node)

#make chatbot the entrypoint
graph_builder.add_edge(START, "chatbot")

#Chatbot will always go to human node
graph_builder.add_edge("tools", "chatbot")

#Chatbot may go back 
graph_builder.add_conditional_edges("human", maybe_exit_human_node)
#Chatbot may go to tools or human
graph_builder.add_conditional_edges("chatbot", maybe_route_to_tools)

graph_with_menu = graph_builder.compile()
graph_with_menu.invoke({"messages": []}, config)