# Real-Time Market Sentiment Analyzer
This notebook implements a LangChain-powered pipeline to analyze real-time market sentiment for a given company.

In [1]:
# Install necessary packages for LangChain and Google Gemini-2.0-flash
!python -m pip install langchain langchain-core langchain-community langchain-experimental --quiet
!python -m pip install -U langchain-google-genai --quiet
!python -m pip install mlflow --quiet
!python -m pip install yahooquery --quiet
!python -m pip install mlflow --quiet

In [1]:
from dotenv import load_dotenv
load_dotenv(override=True)

True

## Step 1: Accept Company Name Input

In [2]:
def get_input():
    company_name = input("Enter Company Name (e.g., 'Google Inc'): ")
    return company_name

## Step 2: Stock Symbol Lookup

In [3]:
from yahooquery import search

def get_stock_symbol(company_name: str) -> str:
    # Attempt to search
    try:
        result = search(company_name)
        stock_symbol = result['quotes'][0]['symbol']
        return stock_symbol
    except Exception as e:
        print(f"Error looking up stock symbol: {e}")
    # Fallback
    static_mapping = {
        "Apple Inc": "AAPL",
        "Google Inc": "GOOG",
        "Microsoft Corporation": "MSFT"
    }
    return static_mapping.get(company_name, "UNKNOWN")

## Step 3: Fetch Company News

In [4]:
from langchain_community.tools.yahoo_finance_news import YahooFinanceNewsTool

# Initialize the Yahoo Finance News Tool
news_tool = YahooFinanceNewsTool()

def get_company_news(stock_code: str) -> str:
    # This tool directly accepts the stock ticker
    return news_tool.run(stock_code)

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [5]:
# test
get_company_news("AAPL")

'Apple (AAPL) iPhone 17 Launch Spurs Buy Rating, $275 Price Target\nApple Inc. (NASDAQ:AAPL) is one of the Trending AI Stocks on Wall Street. On September 10, TD Cowen analyst Krish Sankar reiterated a Buy rating and $275.00 price target on the stock following Apple’s iPhone 17 launch event. According to the analyst, core iPhone updates were expected but there were pleasant surprises as well. These include […]\n\nTim Cook Says Apple Will Invest $600 Billion In US Manufacturing, Creating A \'Domino Effect\' As iPhone 17 Pre-Orders Show Strong Momentum\nApple Inc. (NASDAQ:AAPL) CEO Tim Cook said the company\'s record $600 billion investment in U.S. manufacturing over the next four years will benefit 79 factories nationwide and spark a "domino effect" of growth, as early data points to strong iPhone 17 ...'

In [6]:
from langchain.tools import tool

# A simple chain that takes company_name and returns a dict
@tool
def get_stock_info(company_name: str) -> dict:
    """
    Given a company name, returns its stock code and latest news summary.
    """
    stock_code = get_stock_symbol(company_name)
    news_summary = get_company_news(stock_code)
    return {
        "company_name": company_name,
        "stock_code": stock_code,
        "news_summary": news_summary
    }

## Step 4: Sentiment Analysis Prompt with Google Gemini-2.0-flash

In [12]:
from langchain_core.prompts import PromptTemplate
from langchain.chat_models import init_chat_model
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
import mlflow

tracking_uri = input("Enter mlflow tracking uri: ")

if tracking_uri is None or tracking_uri == "":
    tracking_uri = "http://20.75.92.162:5000/" # fallback 
mlflow.set_tracking_uri(tracking_uri)

# Define the structured output parser
response_schemas = [
    ResponseSchema(name="company_name", description="Name of the company"),
    ResponseSchema(name="stock_code", description="Stock ticker code"),
    ResponseSchema(name="newsdesc", description="Concise news summary"),
    ResponseSchema(name="sentiment", description="Positive/Negative/Neutral"),
    ResponseSchema(name="people_names", description="List of person names"),
    ResponseSchema(name="places_names", description="List of place names"),
    ResponseSchema(name="other_companies_referred", description="List of other companies"),
    ResponseSchema(name="related_industries", description="Related industries"),
    ResponseSchema(name="market_implications", description="Market implications"),
    ResponseSchema(name="confidence_score", description="Confidence in the result"),
]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

# Prompt Template
prompt_template = PromptTemplate(
    input_variables=["company_name", "stock_code", "news_summary"],
    template="""
Given the following news summary: "{news_summary}", analyze the market sentiment for the company "{company_name}" (Stock Code: {stock_code}).
Return the structured JSON according to the following format:
{output_format}
""",
    partial_variables={"output_format": output_parser.get_format_instructions()}
)

#set experiment 
mlflow.set_experiment("Market Sentiment Analysis Piyush")

# Register the prompt in MLflow GenAI registry
mlflow.genai.register_prompt(
    name="market-sentiment-analysis-prompt",
    template=prompt_template,
    commit_message="Initial commit of market sentiment analysis prompt",
    tags={
        "author": "Piyush Rai",
        "task": "market_sentiment_analysis",
        "language": "en",
        "llm": "gemini-2.0-flash"
    },
)


# # Load the prompt by specifying the version
# registered_prompt = mlflow.genai.load_prompt(
#     "prompts:/market-sentiment-analysis-prompt/1"
# )

# # Create the final prompt template using the loaded prompt template string
# prompt_template = PromptTemplate(
#     input_variables=["company_name", "stock_code", "news_summary"],
#     template=registered_prompt.template,
#     partial_variables={"output_format": output_parser.get_format_instructions()}
# )

# Initialize Google Gemini Model
model_name = model_name='gemini-2.0-flash'
llm_model = init_chat_model(model_name, model_provider="google_genai")

# Assemble Chain Components
sentiment_analysis_chain = prompt_template | llm_model | output_parser


2025/09/17 16:07:58 INFO mlflow.tracking.fluent: Experiment with name 'Market Sentiment Analysis Piyush' does not exist. Creating a new experiment.
2025/09/17 16:08:02 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for prompt version to finish creation. Prompt name: market-sentiment-analysis-prompt, version 3


## Step 5: Run the pipeline with mlflow

In [24]:
import time 
import json

def run_pipeline_with_mlflow(company_name: str):
    run_name = f"Sentiment_Analysis_{company_name.replace(' ', '_')}"
    with mlflow.start_run(run_name=run_name) as run:
        mlflow.log_param("input_company_name", company_name)

        # Stock code extraction span
        start = time.time()
        stock_code = get_stock_symbol(company_name)
        end = time.time()
        stock_code_duration = end - start
        mlflow.log_param("stock_code", stock_code)
        mlflow.log_metric("stock_code_extraction_duration_sec", stock_code_duration)

        # News fetching span
        start = time.time()
        news_summary = get_company_news(stock_code)
        end = time.time()
        news_fetching_duration = end - start
        mlflow.log_text(news_summary, "fetched_news.txt")
        mlflow.log_metric("news_fetching_duration_sec", news_fetching_duration)

        info_dict = {
            "company_name": company_name,
            "stock_code": stock_code,
            "news_summary": news_summary
        }

        # Log full prompt
        prompt_str = prompt_template.format(
            company_name=info_dict["company_name"],
            stock_code=info_dict["stock_code"],
            news_summary=info_dict["news_summary"],
            output_format=output_parser.get_format_instructions()
        )
        mlflow.log_text(prompt_str, "sent_prompt.txt")

        # Sentiment parsing span
        start = time.time()
        final_output = sentiment_analysis_chain.invoke(info_dict)
        end = time.time()
        sentiment_parsing_duration = end - start
        mlflow.log_text(json.dumps(final_output, indent=2), "sentiment_analysis_output.json")
        mlflow.log_metric("sentiment_parsing_duration_sec", sentiment_parsing_duration)

        # Log total duration explicitly
        total_duration = stock_code_duration + news_fetching_duration + sentiment_parsing_duration
        mlflow.log_metric("total_pipeline_duration_sec", total_duration)

        return final_output

In [None]:
result = run_pipeline_with_mlflow(get_input())
print(json.dumps(result, indent=2))