<a href="https://colab.research.google.com/github/BhanuPrakashSamoju/gen_ai_architect_program/blob/main/assignments/assignment_01/bhanu_samoju_assignment_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

/content/sample_data

In [None]:
! ls /content/

env  requirements.txt  sample_data


In [1]:
! python3 -m pip install -r /content/requirements.txt

Collecting langgraph==0.6.7 (from -r /content/requirements.txt (line 2))
  Downloading langgraph-0.6.7-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain-openai==0.3.33 (from -r /content/requirements.txt (line 3))
  Downloading langchain_openai-0.3.33-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-community==0.3.29 (from -r /content/requirements.txt (line 4))
  Downloading langchain_community-0.3.29-py3-none-any.whl.metadata (2.9 kB)
Collecting duckduckgo-search==8.1.1 (from -r /content/requirements.txt (line 5))
  Downloading duckduckgo_search-8.1.1-py3-none-any.whl.metadata (16 kB)
Collecting mlflow==3.3.2 (from -r /content/requirements.txt (line 8))
  Downloading mlflow-3.3.2-py3-none-any.whl.metadata (30 kB)
Collecting langgraph-checkpoint<3.0.0,>=2.1.0 (from langgraph==0.6.7->-r /content/requirements.txt (line 2))
  Downloading langgraph_checkpoint-2.1.1-py3-none-any.whl.metadata (4.2 kB)
Collecting langgraph-prebuilt<0.7.0,>=0.6.0 (from langgraph==0.6.7->-r /content

In [2]:
import os
import yfinance as yf
from dotenv import load_dotenv
from typing import List, TypedDict, Annotated
import operator

import mlflow
from langgraph.graph import StateGraph, START, END

from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.tools.yahoo_finance_news import YahooFinanceNewsTool
from langchain_openai import AzureChatOpenAI
from pydantic import BaseModel, Field

# --- Load Environment Variables ---
# Make sure to create a .env file with your Azure OpenAI credentials
load_dotenv("/content/env")



True

In [3]:
import json

In [4]:
os.getenv("OPENAI_API_TYPE")

'azure'

In [14]:
import mlflow
mlflow.set_tracking_uri("http://20.75.92.162:5000")

In [32]:
# --- 1. Define the Structured Output Schema using Pydantic ---
class SentimentProfile(BaseModel):
    """Structured sentiment profile for a given company based on recent news."""
    company_name: str = Field(description="The name of the company being analyzed.")
    stock_code: str = Field(description="The stock market ticker symbol for the company.")
    news_summary: str = Field(description="A concise summary of the key news headlines provided.")
    sentiment: str = Field(description="Overall sentiment, classified as 'Positive', 'Negative', or 'Neutral'.")
    people_names: List[str] = Field(description="List of names of people mentioned in the news.")
    places_names: List[str] = Field(description="List of geographic places or locations mentioned.")
    other_companies_referred: List[str] = Field(description="List of other company names mentioned.")
    related_industries: List[str] = Field(description="List of industries related to the news content.")
    market_implications: str = Field(description="A brief analysis of the potential market implications of the news.")
    confidence_score: float = Field(description="A confidence score (0.0 to 1.0) for the sentiment classification.")

# --- 2. Define the State for the Graph ---
# The state is a dictionary that will be passed between nodes in the graph.
# Each field in the state is updated by a node as the graph progresses.
class GraphState(TypedDict):
    company_name: str
    stock_code: str
    news_articles: str
    sentiment_profile: SentimentProfile

In [33]:
# --- 3. Define the Nodes of the Graph ---

def find_stocks(search_query):
    try:
        search_results = yf.Search(search_query) # performing search
        if search_results.quotes:
            filtered_stocks = [
                quote for quote in search_results.quotes if quote['quoteType'] == 'EQUITY' # filtering by 'EQUITY' quoteType
            ]
            return filtered_stocks
        else:
            return []
    except Exception as e:
        print(f"Error during search: {e}")
        return []

def get_stock_code(state: GraphState) -> GraphState:
    """
    It uses the yfinance library to look up the ticker symbol.
    """
    print("---Fetching Stock Code---")
    company_name = state['company_name']
    try:

        search_results = find_stocks(company_name)
        if search_results:
            print(f"Found {len(search_results)} results for {company_name}:")
            # for stock in search_results:
            #   print(f"Name: {stock['longname']}, Ticker: {stock['symbol']}, QuoteType: {stock['quoteType']}")
            top_stock = search_results[0]
            stock_code = top_stock['symbol']
            print(f"Found stock code for {company_name}: {stock_code}")
            return {"stock_code": stock_code}
        else:
            print(f"No stocks found for '{search_term}'.")

        return {"stock_code": stock_code}
    except Exception as e:
        print(f"Error fetching stock code for {company_name}: {e}")
        # If lookup fails, we can fall back to using the company name for news search
        return {"stock_code": company_name}

In [34]:
def fetch_news(state: GraphState) -> GraphState:
    """
    Node 2: Fetches recent news for the company using its stock code.
    It uses the YahooFinanceNewsTool for fetching news.
    """
    print("---Fetching Company News---")
    stock_code = state['stock_code']
    company_name = state['company_name']

    # Using YahooFinanceNewsTool to fetch news.
    news_tool = YahooFinanceNewsTool()

    try:
        # The YahooFinanceNewsTool takes the stock ticker as input
        news = news_tool.run(stock_code)
        print(f"Successfully fetched news for {stock_code}.")
        return {"news_articles": news}
    except Exception as e:
        print(f"Error fetching news for {stock_code}: {e}")
        return {"news_articles": "Could not fetch news."}

In [35]:
def analyze_sentiment(state: GraphState) -> GraphState:
    """
    It calls the Azure OpenAI model and forces it to return a JSON object
    matching the SentimentProfile Pydantic schema.
    """
    print("---Analyzing Sentiment with LLM---")
    company_name = state['company_name']
    stock_code = state['stock_code']
    news_articles = state['news_articles']

    print(f"State Variables: {company_name}, {stock_code}, {news_articles}")

    # Define the prompt template
    prompt_template = """
    You are an expert financial analyst. Your task is to generate a structured sentiment profile
    for the company '{company_name}' ({stock_code}) based on the following news articles.

    Analyze the provided news content and generate a JSON object with the specified fields.

    News Articles:
    "{news_articles}"

    Please provide your analysis in a structured JSON format.
    """

    # Register or load the prompt using MLflow
    prompt_name = "bhanu-sentiment-analysis-prompt"
    try:
        # Try to load the latest version of the prompt
        prompt = mlflow.genai.load_prompt(f"prompts:/{prompt_name}/latest")
        print(f"Loaded existing prompt '{prompt.name}' (version {prompt.version})")
    except Exception:
        # If the prompt doesn't exist, register it
        print(f"Prompt '{prompt_name}' not found. Registering a new one.")
        prompt = mlflow.genai.register_prompt(
            name=prompt_name,
            template=prompt_template,
            commit_message="Prompt for analyzing market sentiment of a company based on news.",
            tags={
                "task": "sentiment analysis",
                "domain": "finance",
                "model_output": "json",
            },
        )
        print(f"Registered new prompt '{prompt.name}' (version {prompt.version})")


    # Initialize the Azure Chat Model
    llm = AzureChatOpenAI(
        azure_deployment=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
        openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
        openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
        openai_api_type=os.getenv("OPENAI_API_TYPE"),
        temperature=0,
        # streaming=False,
    )

    structured_llm = llm.with_structured_output(SentimentProfile)

    try:
        # print(f"Prompt is : {prompt.template.format(company_name=company_name, stock_code=stock_code, news_articles=news_articles)}")
        # Invoke the model with the prompt and input variables
        profile = structured_llm.invoke(prompt.template.format(company_name=company_name, stock_code=stock_code, news_articles=news_articles))
        print("Successfully analyzed sentiment.")
        return {"sentiment_profile": profile.model_dump()}
    except Exception as e:
        print(f"Error during sentiment analysis: {e}")
        return {"sentiment_profile": None} # Handle potential errors gracefully

In [36]:
# --- 4. Build the Graph ---
# We define the workflow and connect the nodes in a specific sequence.

# Create a new graph
workflow = StateGraph(GraphState)

# Add the nodes to the graph
workflow.add_node("get_stock_code", get_stock_code)
workflow.add_node("fetch_news", fetch_news)
workflow.add_node("analyze_sentiment", analyze_sentiment)

workflow.add_edge(START, "get_stock_code")
workflow.add_edge("get_stock_code", "fetch_news")
workflow.add_edge("fetch_news", "analyze_sentiment")
workflow.add_edge("analyze_sentiment", END)

# Compile the graph into a runnable object
app = workflow.compile()


In [38]:

company_to_analyze = "Nvidia"
inputs = {"company_name": company_to_analyze}

mlflow.set_experiment("Market Sentiment Analysis")
mlflow.langchain.autolog() # Automatically logs all LangChain components

with mlflow.start_run(run_name=f"Sentiment Analysis for {company_to_analyze}") as run:
    print(f"🚀 Starting sentiment analysis for: {company_to_analyze}")

    # Invoke the graph with the initial input
    final_state = app.invoke(inputs)

    print(final_state)

    result_json = final_state['sentiment_profile']

    print("\n--- Final Sentiment Profile ---")
    print(result_json)

    # Log the final JSON output as an artifact in MLflow
    with open("sentiment_profile.json", "w") as f:
        f.write(json.dumps(result_json, indent=2))
    mlflow.log_artifact("sentiment_profile.json")

print(f"\n✅ Analysis complete for {company_to_analyze}.")
print("📈 Run `mlflow ui` in your terminal to view the trace.")

🚀 Starting sentiment analysis for: Nvidia
---Fetching Stock Code---
Found 4 results for Nvidia:
Found stock code for Nvidia: NVDA
---Fetching Company News---
Successfully fetched news for NVDA.
---Analyzing Sentiment with LLM---
State Variables: Nvidia, NVDA, No news found for company that searched with NVDA ticker.
Prompt 'bhanu-sentiment-analysis-prompt' not found. Registering a new one.


2025/09/17 06:37:48 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for prompt version to finish creation. Prompt name: bhanu-sentiment-analysis-prompt, version 6


Registered new prompt 'bhanu-sentiment-analysis-prompt' (version 6)
Successfully analyzed sentiment.
{'company_name': 'Nvidia', 'stock_code': 'NVDA', 'news_articles': 'No news found for company that searched with NVDA ticker.', 'sentiment_profile': {'company_name': 'Nvidia', 'stock_code': 'NVDA', 'news_summary': 'No news found for company that searched with NVDA ticker.', 'sentiment': 'Neutral', 'people_names': [], 'places_names': [], 'other_companies_referred': [], 'related_industries': [], 'market_implications': 'The lack of news suggests stability in the current market perception of Nvidia, with no immediate catalysts for significant price movement.', 'confidence_score': 0.5}}

--- Final Sentiment Profile ---
{'company_name': 'Nvidia', 'stock_code': 'NVDA', 'news_summary': 'No news found for company that searched with NVDA ticker.', 'sentiment': 'Neutral', 'people_names': [], 'places_names': [], 'other_companies_referred': [], 'related_industries': [], 'market_implications': 'The la