<a href="https://colab.research.google.com/github/BhanuPrakashSamoju/gen_ai_architect_program/blob/main/assignments/assignment_01/bhanu_samoju_assignment_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

/content/sample_data

In [None]:
! ls /content/

env  requirements.txt  sample_data


In [None]:
! python3 -m pip install -r /content/requirements.txt



In [None]:
import os
import yfinance as yf
from dotenv import load_dotenv
from typing import List, TypedDict, Annotated
import operator

import mlflow
from langgraph.graph import StateGraph, START, END

from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.tools.yahoo_finance_news import YahooFinanceNewsTool
from langchain_openai import AzureChatOpenAI
from pydantic import BaseModel, Field

# --- Load Environment Variables ---
# Make sure to create a .env file with your Azure OpenAI credentials
load_dotenv("/content/env")

True

In [None]:
import json

In [None]:
!pip show pydantic

Name: pydantic
Version: 2.11.7
Summary: Data validation using Python type hints
Home-page: https://github.com/pydantic/pydantic
Author: 
Author-email: Samuel Colvin <s@muelcolvin.com>, Eric Jolibois <em.jolibois@gmail.com>, Hasan Ramezani <hasan.r67@gmail.com>, Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>, Terrence Dorsey <terry@pydantic.dev>, David Montague <david@pydantic.dev>, Serge Matveenko <lig@countzero.co>, Marcelo Trylesinski <marcelotryle@gmail.com>, Sydney Runkle <sydneymarierunkle@gmail.com>, David Hewitt <mail@davidhewitt.io>, Alex Hall <alex.mojaki@gmail.com>, Victorien Plot <contact@vctrn.dev>
License: 
Location: /usr/local/lib/python3.12/dist-packages
Requires: annotated-types, pydantic-core, typing-extensions, typing-inspection
Required-by: albumentations, confection, fastapi, google-adk, google-cloud-aiplatform, google-genai, google-generativeai, gradio, langchain, langchain-core, langgraph, langsmith, mcp, mlflow-skinny, mlflow-tracing, openai,

In [None]:
os.getenv("OPENAI_API_TYPE")

'azure'

In [None]:
# --- 1. Define the Structured Output Schema using Pydantic ---
class SentimentProfile(BaseModel):
    """Structured sentiment profile for a given company based on recent news."""
    company_name: str = Field(description="The name of the company being analyzed.")
    stock_code: str = Field(description="The stock market ticker symbol for the company.")
    news_summary: str = Field(description="A concise summary of the key news headlines provided.")
    sentiment: str = Field(description="Overall sentiment, classified as 'Positive', 'Negative', or 'Neutral'.")
    people_names: List[str] = Field(description="List of names of people mentioned in the news.")
    places_names: List[str] = Field(description="List of geographic places or locations mentioned.")
    other_companies_referred: List[str] = Field(description="List of other company names mentioned.")
    related_industries: List[str] = Field(description="List of industries related to the news content.")
    market_implications: str = Field(description="A brief analysis of the potential market implications of the news.")
    confidence_score: float = Field(description="A confidence score (0.0 to 1.0) for the sentiment classification.")

# --- 2. Define the State for the Graph ---
# The state is a dictionary that will be passed between nodes in the graph.
# Each field in the state is updated by a node as the graph progresses.
class GraphState(TypedDict):
    company_name: str
    stock_code: str
    news_articles: str
    sentiment_profile: SentimentProfile

In [None]:
# --- 3. Define the Nodes of the Graph ---

def find_stocks(search_query):
    try:
        search_results = yf.Search(search_query) # performing search
        if search_results.quotes:
            filtered_stocks = [
                quote for quote in search_results.quotes if quote['quoteType'] == 'EQUITY' # filtering by 'EQUITY' quoteType
            ]
            return filtered_stocks
        else:
            return []
    except Exception as e:
        print(f"Error during search: {e}")
        return []

def get_stock_code(state: GraphState) -> GraphState:
    """
    It uses the yfinance library to look up the ticker symbol.
    """
    print("---Fetching Stock Code---")
    company_name = state['company_name']
    try:

        search_results = find_stocks(company_name)
        if search_results:
            print(f"Found {len(search_results)} results for {company_name}:")
            # for stock in search_results:
            #   print(f"Name: {stock['longname']}, Ticker: {stock['symbol']}, QuoteType: {stock['quoteType']}")
            top_stock = search_results[0]
            stock_code = top_stock['symbol']
            print(f"Found stock code for {company_name}: {stock_code}")
            return {"stock_code": stock_code}
        else:
            print(f"No stocks found for '{search_term}'.")

        return {"stock_code": stock_code}
    except Exception as e:
        print(f"Error fetching stock code for {company_name}: {e}")
        # If lookup fails, we can fall back to using the company name for news search
        return {"stock_code": company_name}

In [None]:
company_name = "Google"

def find_stocks(search_query):
    try:
        search_results = yf.Search(search_query) # performing search
        if search_results.quotes:
            filtered_stocks = [
                quote for quote in search_results.quotes if quote['quoteType'] == 'EQUITY' # filtering by 'EQUITY' quoteType
            ]
            return filtered_stocks
        else:
            return []
    except Exception as e:
        print(f"Error during search: {e}")
        return []


search_term = company_name
indian_stocks = find_stocks(search_term) # calling search and filter function
if indian_stocks:
    print(f"Found {len(indian_stocks)} Indian stocks:")
    for stock in indian_stocks:
      print(f"Name: {stock['longname']}, Ticker: {stock['symbol']}, QuoteType: {stock['quoteType']}")
else:
    print(f"No Indian stocks found for '{search_term}'.")

# print(json.dumps(ticker.info, indent =2))


Found 1 Indian stocks:
Name: Alphabet Inc., Ticker: GOOG, QuoteType: EQUITY


In [None]:
def fetch_news(state: GraphState) -> GraphState:
    """
    Node 2: Fetches recent news for the company using its stock code.
    It uses the YahooFinanceNewsTool for fetching news.
    """
    print("---Fetching Company News---")
    stock_code = state['stock_code']
    company_name = state['company_name']

    # Using YahooFinanceNewsTool to fetch news.
    news_tool = YahooFinanceNewsTool()

    try:
        # The YahooFinanceNewsTool takes the stock ticker as input
        news = news_tool.run(stock_code)
        print(f"Successfully fetched news for {stock_code}.")
        return {"news_articles": news}
    except Exception as e:
        print(f"Error fetching news for {stock_code}: {e}")
        return {"news_articles": "Could not fetch news."}

In [None]:
def analyze_sentiment(state: GraphState) -> GraphState:
    """
    It calls the Azure OpenAI model and forces it to return a JSON object
    matching the SentimentProfile Pydantic schema.
    """
    print("---Analyzing Sentiment with LLM---")
    company_name = state['company_name']
    stock_code = state['stock_code']
    news_articles = state['news_articles']

    print(f"State Variables: {company_name}, {stock_code}, {news_articles}")

    # Define the prompt template
    prompt_template = """
    You are an expert financial analyst. Your task is to generate a structured sentiment profile
    for the company '{company_name}' ({stock_code}) based on the following news articles.

    Analyze the provided news content and generate a JSON object with the specified fields.

    News Articles:
    "{news_articles}"

    Please provide your analysis in a structured JSON format.
    """

    # Register or load the prompt using MLflow
    prompt_name = "bhanu-sentiment-analysis-prompt"
    try:
        # Try to load the latest version of the prompt
        prompt = mlflow.genai.load_prompt(f"prompts:/{prompt_name}/latest")
        print(f"Loaded existing prompt '{prompt.name}' (version {prompt.version})")
    except Exception:
        # If the prompt doesn't exist, register it
        print(f"Prompt '{prompt_name}' not found. Registering a new one.")
        prompt = mlflow.genai.register_prompt(
            name=prompt_name,
            template=prompt_template,
            description="Prompt for analyzing market sentiment of a company based on news.",
            tags={
                "task": "sentiment analysis",
                "domain": "finance",
                "model_output": "json",
            },
        )
        print(f"Registered new prompt '{prompt.name}' (version {prompt.version})")


    # Initialize the Azure Chat Model
    llm = AzureChatOpenAI(
        azure_deployment=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
        openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
        openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
        openai_api_type=os.getenv("OPENAI_API_TYPE"),
        temperature=0,
        # streaming=False,
    )

    structured_llm = llm.with_structured_output(SentimentProfile)

    try:
        # Invoke the model with the prompt and input variables
        profile = structured_llm.invoke(prompt.format(company_name=company_name, stock_code=stock_code, news_articles=news_articles))
        print("Successfully analyzed sentiment.")
        return {"sentiment_profile": profile.model_dump()}
    except Exception as e:
        print(f"Error during sentiment analysis: {e}")
        return {"sentiment_profile": None} # Handle potential errors gracefully

In [None]:
# --- 4. Build the Graph ---
# We define the workflow and connect the nodes in a specific sequence.

# Create a new graph
workflow = StateGraph(GraphState)

# Add the nodes to the graph
workflow.add_node("get_stock_code", get_stock_code)
workflow.add_node("fetch_news", fetch_news)
workflow.add_node("analyze_sentiment", analyze_sentiment)

workflow.add_edge(START, "get_stock_code")
workflow.add_edge("get_stock_code", "fetch_news")
workflow.add_edge("fetch_news", "analyze_sentiment")
workflow.add_edge("analyze_sentiment", END)

# Compile the graph into a runnable object
app = workflow.compile()


In [None]:

company_to_analyze = "Google"
inputs = {"company_name": company_to_analyze}

mlflow.set_experiment("Market Sentiment Analysis")
mlflow.langchain.autolog() # Automatically logs all LangChain components

with mlflow.start_run(run_name=f"Sentiment Analysis for {company_to_analyze}") as run:
    print(f"🚀 Starting sentiment analysis for: {company_to_analyze}")

    # Invoke the graph with the initial input
    final_state = app.invoke(inputs)

    print(final_state)

    result_json = final_state['sentiment_profile']

    print("\n--- Final Sentiment Profile ---")
    print(result_json)

    # Log the final JSON output as an artifact in MLflow
    with open("sentiment_profile.json", "w") as f:
        f.write(json.dumps(result_json, indent=2))
    mlflow.log_artifact("sentiment_profile.json")

print(f"\n✅ Analysis complete for {company_to_analyze}.")
print("📈 Run `mlflow ui` in your terminal to view the trace.")

🚀 Starting sentiment analysis for: Google
---Fetching Stock Code---
Found 1 results for Google:
Found stock code for Google: GOOG
---Fetching Company News---
Successfully fetched news for GOOG.
---Analyzing Sentiment with LLM---
State Variables: Google, GOOG, No news found for company that searched with GOOG ticker.
Successfully analyzed sentiment.
{'company_name': 'Google', 'stock_code': 'GOOG', 'news_articles': 'No news found for company that searched with GOOG ticker.', 'sentiment_profile': {'company_name': 'Google', 'stock_code': 'GOOG', 'news_summary': 'No news found for company that searched with GOOG ticker.', 'sentiment': 'Neutral', 'people_names': [], 'places_names': [], 'other_companies_referred': [], 'related_industries': [], 'market_implications': 'The lack of news suggests stability, but it may also indicate a lack of significant developments that could impact investor sentiment.', 'confidence_score': 0.5}}

--- Final Sentiment Profile ---
{'company_name': 'Google', 'stock