In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
import os
import time
import concurrent.futures
from typing import Dict, Any
import logging
from threading import Lock

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Thread-safe lock for file writing
file_lock = Lock()

In [None]:
import glob
import gradio as gr
import google.generativeai as genai
from dotenv import load_dotenv
import re

In [None]:
# imports for langchain and Chroma and plotly

from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document, BaseRetriever
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate

# from langchain_huggingface import HuggingFaceEmbeddings

from langchain.vectorstores import Chroma

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import ChatPromptTemplate

import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go
from typing import List
from pydantic import Field

In [None]:
# constants
end_date = "2025-04-12"

In [None]:
def calculate_rsi(data, window=14):
    # Ensure the 'Close' column is correctly accessed
    if isinstance(data.columns, pd.MultiIndex):
        close_prices = data['Close'].iloc[:, 0]
    else:
        close_prices = data['Close']

    # Calculate price changes
    delta = close_prices.diff()

    # Separate gains (positive) and losses (negative)
    gains = delta.where(delta > 0, 0)
    losses = -delta.where(delta < 0, 0)

    # Initialize the averages
    avg_gains = [np.nan] * len(close_prices)
    avg_losses = [np.nan] * len(close_prices)

    # Calculate first averages after initial window
    first_avg_gain = gains[1:window+1].mean()
    first_avg_loss = losses[1:window+1].mean()
    avg_gains[window] = first_avg_gain
    avg_losses[window] = first_avg_loss

    # Calculate subsequent values using the Wilder's smoothing method
    for i in range(window+1, len(close_prices)):
        avg_gain = (avg_gains[i-1] * (window-1) + gains[i]) / window
        avg_loss = (avg_losses[i-1] * (window-1) + losses[i]) / window
        avg_gains[i] = avg_gain
        avg_losses[i] = avg_loss

    # Convert to Series with proper index
    avg_gains = pd.Series(avg_gains, index=close_prices.index)
    avg_losses = pd.Series(avg_losses, index=close_prices.index)

    # Calculate RS and RSI
    rs = avg_gains / avg_losses
    rsi = 100 - (100 / (1 + rs))

    return rsi

In [None]:
def get_stock_data(symbol, period='1y', end_date=None):
    """Fetch stock data with proper error handling and ensure unique data per stock

    Args:
        symbol (str): Stock symbol
        period (str): Data period to fetch (default='1y')
        end_date (str): End date in 'YYYY-MM-DD' format. If None, uses current date

    Returns:
        tuple: (DataFrame with stock data, actual_end_date)
    """
    try:
        time.sleep(1)  # Add delay to prevent rate limiting

        kwargs = {
            'tickers': f"{symbol}.NS",
            'period': period,
            'progress': False,
            'threads': False,
            'ignore_tz': True,
            'auto_adjust': True,
            'prepost': False,
            'repair': True
        }

        # Add end parameter if end_date is provided
        if end_date:
            kwargs['end'] = end_date
            # Calculate start date based on period
            if period == '1y':
                start_date = pd.to_datetime(end_date) - pd.DateOffset(years=1)
            elif period == '2y':
                start_date = pd.to_datetime(end_date) - pd.DateOffset(years=2)
            kwargs['start'] = start_date.strftime('%Y-%m-%d')

        # Download data
        data = yf.download(**kwargs)

        if data.empty:
            logger.warning(f"No data available for {symbol}")
            return pd.DataFrame(), None

        # Get the actual last date from the data
        actual_end_date = data.index[-1]

        # Handle multi-index columns if present
        if isinstance(data.columns, pd.MultiIndex):
            df = pd.DataFrame(index=data.index)
            column_map = {
                'Open': ('Open', symbol + '.NS'),
                'High': ('High', symbol + '.NS'),
                'Low': ('Low', symbol + '.NS'),
                'Close': ('Close', symbol + '.NS'),
                'Volume': ('Volume', symbol + '.NS')
            }
            for col, multi_idx in column_map.items():
                try:
                    if multi_idx in data.columns:
                        df[col] = data[multi_idx]
                    else:
                        df[col] = data[(multi_idx[0],)]
                except:
                    df[col] = data[multi_idx[0]]
        else:
            df = data.copy()

        # Verify data is valid
        if 'Close' in df.columns:
            latest_price = df['Close'].iloc[-1]
            logger.info(f"Verified unique data for {symbol}: Latest price = Rs.{latest_price:.2f}")
            if end_date:
                requested_date = pd.to_datetime(end_date).normalize()
                actual_date = actual_end_date.normalize()
                if actual_date != requested_date:
                    logger.info(f"Note: Last available date ({actual_date.strftime('%Y-%m-%d')}) differs from requested date ({requested_date.strftime('%Y-%m-%d')})")
        else:
            logger.error(f"Missing Close column for {symbol}")
            return pd.DataFrame(), None

        return df, actual_end_date

    except Exception as e:
        logger.error(f"Error fetching data for {symbol}: {e}")
        return pd.DataFrame(), None

In [None]:
def calculate_macd(data, fast=12, slow=26, signal=9):
    try:
        # Make sure we're working with a copy of the data to avoid warnings
        if isinstance(data.columns, pd.MultiIndex):
            close_series = data['Close'].iloc[:, 0].copy()
        else:
            close_series = data['Close'].copy()

        # Calculate EMAs
        ema_fast = close_series.ewm(span=fast, adjust=False).mean()
        ema_slow = close_series.ewm(span=slow, adjust=False).mean()

        # Calculate MACD components
        macd_line = ema_fast - ema_slow
        signal_line = macd_line.ewm(span=signal, adjust=False).mean()
        histogram = macd_line - signal_line

        return macd_line, signal_line, histogram
    except Exception as e:
        logger.error(f"Error calculating MACD: {e}")
        empty_series = pd.Series(dtype=float)
        return empty_series, empty_series, empty_series

In [None]:
def calculate_momentum(data, period=20):
    try:
        if isinstance(data.columns, pd.MultiIndex):
            close_series = data['Close'].iloc[:, 0]
        else:
            close_series = data['Close']

        momentum = close_series / close_series.shift(period) - 1
        return momentum * 100
    except Exception as e:
        logger.error(f"Error calculating momentum: {e}")
        return pd.Series(dtype=float)

In [None]:
def calculate_momentum_index(data, period=126):
    try:
        if isinstance(data.columns, pd.MultiIndex):
            close_series = data['Close'].iloc[:, 0]
        else:
            close_series = data['Close']

        returns = close_series.pct_change().dropna()
        momentum_std = returns.rolling(window=period).std() * np.sqrt(252)
        return momentum_std
    except Exception as e:
        logger.error(f"Error calculating momentum index: {e}")
        return pd.Series(dtype=float)

In [None]:
def get_fundamental_data(symbol: str) -> Dict[str, Any]:
    """Fetch fundamental data"""
    try:
        ticker = yf.Ticker(f"{symbol}.NS")
        info = ticker.info
        fundamental_data = {
            'P/E Ratio': info.get('trailingPE', 'N/A'),
            'Forward P/E': info.get('forwardPE', 'N/A'),
            'Market Cap': info.get('marketCap', 'N/A'),
            'EPS': info.get('trailingEps', 'N/A'),
            'Dividend Yield': info.get('dividendYield', 'N/A'),
            'Debt to Equity': info.get('debtToEquity', 'N/A'),
            'Return on Equity': info.get('returnOnEquity', 'N/A'),
            'Revenue Growth': info.get('revenueGrowth', 'N/A'),
            'Profit Margins': info.get('profitMargins', 'N/A'),
            'Beta': info.get('beta', 'N/A'),
            'Current Ratio': info.get('currentRatio', 'N/A'),
            'Book Value': info.get('bookValue', 'N/A'),
            '52-Week High': info.get('fiftyTwoWeekHigh', 'N/A'),
            '52-Week Low': info.get('fiftyTwoWeekLow', 'N/A'),
            'Target Price': info.get('targetMeanPrice', 'N/A')
        }
        return fundamental_data
    except Exception as e:
        logger.error(f"Error fetching fundamental data for {symbol}: {e}")
        return {}

In [None]:
def determine_strength(data, rsi, macd_line, signal_line):
    # Ensure data is not empty
    if isinstance(data, pd.DataFrame) and data.empty:
        return [], []

    # Handle multi-index columns if they exist
    if isinstance(data.columns, pd.MultiIndex):
        # Extract the first level columns if multi-index
        data_cols = {
            'Open': data['Open'].iloc[:, 0],
            'High': data['High'].iloc[:, 0],
            'Low': data['Low'].iloc[:, 0],
            'Close': data['Close'].iloc[:, 0],
            'Volume': data['Volume'].iloc[:, 0]
        }
        data_df = pd.DataFrame(data_cols, index=data.index)
    else:
        data_df = data

    current_price = data_df['Close'].iloc[-1]
    sma_50 = data_df['Close'].rolling(window=50).mean().iloc[-1]
    sma_200 = data_df['Close'].rolling(window=200).mean().iloc[-1]

    # Convert any Series to scalar values
    if isinstance(current_price, pd.Series):
        current_price = current_price.iloc[0]
    if isinstance(sma_50, pd.Series):
        sma_50 = sma_50.iloc[0]
    if isinstance(sma_200, pd.Series):
        sma_200 = sma_200.iloc[0]

    strengths = []
    weaknesses = []

    # RSI Analysis
    if not isinstance(rsi, pd.Series) or len(rsi) == 0:
        print("RSI data is empty")
    else:
        last_rsi = rsi.iloc[-1]
        if isinstance(last_rsi, pd.Series):
            last_rsi = last_rsi.item()
        if last_rsi > 70:
            weaknesses.append("RSI indicates overbought conditions")
        elif last_rsi < 30:
            strengths.append("RSI indicates oversold conditions (potential buying opportunity)")
        elif 40 <= last_rsi <= 60:
            strengths.append("RSI in neutral zone showing balance between buyers and sellers")
        elif 60 < last_rsi < 70:
            strengths.append("Strong RSI showing positive momentum")

    # MACD Analysis
    if (not isinstance(macd_line, pd.Series) or len(macd_line) == 0 or
        not isinstance(signal_line, pd.Series) or len(signal_line) == 0):
        print("MACD or signal line data is empty")
    else:
        macd_value = macd_line.iloc[-1]
        signal_value = signal_line.iloc[-1]
        if isinstance(macd_value, pd.Series):
            macd_value = macd_value.item()
        if isinstance(signal_value, pd.Series):
            signal_value = signal_value.item()
        if macd_value > signal_value:
            strengths.append("MACD line above signal line indicating bullish momentum")
        else:
            weaknesses.append("MACD line below signal line indicating bearish momentum")

    # Moving Average Analysis
    if pd.notna(sma_50) and pd.notna(current_price):
        if current_price > sma_50:
            strengths.append("Price above 50-day SMA showing short-term strength")
        else:
            weaknesses.append("Price below 50-day SMA showing short-term weakness")

    if pd.notna(sma_200) and pd.notna(current_price):
        if current_price > sma_200:
            strengths.append("Price above 200-day SMA suggesting long-term uptrend")
        else:
            weaknesses.append("Price below 200-day SMA suggesting long-term downtrend")

    # Golden/Death Cross
    if pd.notna(sma_50) and pd.notna(sma_200):
        if sma_50 > sma_200 and (sma_50 / sma_200 - 1) < 0.03:
            strengths.append("Recent golden cross or nearing golden cross (50-day SMA crossing above 200-day SMA)")
        elif sma_50 < sma_200 and (sma_200 / sma_50 - 1) < 0.03:
            weaknesses.append("Recent death cross or nearing death cross (50-day SMA crossing below 200-day SMA)")

    # Volume Analysis
    avg_volume = data_df['Volume'].mean()
    if isinstance(avg_volume, pd.Series):
        avg_volume = avg_volume.item()

    recent_volume = data_df['Volume'].iloc[-5:].mean()
    if isinstance(recent_volume, pd.Series):
        recent_volume = recent_volume.item()

    # Calculate price trend
    recent_price_mean = data_df['Close'].iloc[-5:].mean()
    if isinstance(recent_price_mean, pd.Series):
        recent_price_mean = recent_price_mean.item()

    price_trend_up = current_price > recent_price_mean

    if recent_volume > avg_volume * 1.2:
        if price_trend_up:
            strengths.append("Strong volume supporting upward price movement")
        else:
            weaknesses.append("High volume during price decline indicates selling pressure")
    elif recent_volume < avg_volume * 0.8:
        if price_trend_up:
            strengths.append("Price rising on low volume - potential weakness")
        else:
            weaknesses.append("Price declining on low volume - potential for reversal")

    # Price Movement
    if len(data_df['Close']) >= 22:
        latest_price = data_df['Close'].iloc[-1]
        price_22_days_ago = data_df['Close'].iloc[-22]

        if isinstance(latest_price, pd.Series):
            latest_price = latest_price.item()
        if isinstance(price_22_days_ago, pd.Series):
            price_22_days_ago = price_22_days_ago.item()

        monthly_return = (latest_price / price_22_days_ago - 1) * 100

        if monthly_return > 5:
            strengths.append(f"Strong monthly return of {monthly_return:.2f}%")
        elif monthly_return < -5:
            weaknesses.append(f"Weak monthly return of {monthly_return:.2f}%")

    # Volatility
    if len(data_df['Close']) >= 2:
        returns = data_df['Close'].pct_change().dropna()
        if len(returns) > 0:
            volatility = returns.std() * np.sqrt(252) * 100
            if isinstance(volatility, pd.Series):
                volatility = volatility.item()

            if volatility > 30:
                weaknesses.append(f"High volatility ({volatility:.2f}%) indicating increased risk")
            elif volatility < 15:
                strengths.append(f"Low volatility ({volatility:.2f}%) indicating stability")

    return strengths, weaknesses

In [None]:
def get_weekday(date):
    """Get weekday number (1=Monday to 7=Sunday)"""
    return date.isoweekday()

def get_next_friday(target_date):
    """Get the next Friday from a given date. If the date is Friday, return the same date."""
    weekday = get_weekday(target_date)
    if weekday == 5:  # If Friday
        return target_date

    days_until_friday = {
        1: 4,  # Monday -> +4 days
        2: 3,  # Tuesday -> +3 days
        3: 2,  # Wednesday -> +2 days
        4: 1,  # Thursday -> +1 day
        6: 6,  # Saturday -> +6 days
        7: 5   # Sunday -> +5 days
    }
    days_to_add = days_until_friday[weekday]
    return target_date + pd.Timedelta(days=days_to_add)

def get_next_thursday(target_date):
    """Get the next Thursday from a given date. If the date is Friday, return the previous day."""
    weekday = get_weekday(target_date)
    if weekday == 5:  # If Friday
        return target_date - pd.Timedelta(days=1)
    return get_next_friday(target_date) - pd.Timedelta(days=1)

def find_closest_trading_day(data, target_date):
    """Find the closest trading day in the data for a given target date"""
    target_friday = get_next_friday(target_date)
    target_thursday = get_next_thursday(target_date)

    # Convert index to datetime if it's not already and normalize to remove time component
    date_index = pd.to_datetime(data.index).normalize()

    # Normalize target dates to remove time component
    target_friday = pd.to_datetime(target_friday).normalize()
    target_thursday = pd.to_datetime(target_thursday).normalize()

    # Try to find exact matches first
    friday_match = date_index[date_index == target_friday]
    thursday_match = date_index[date_index == target_thursday]

    if not friday_match.empty:
        return friday_match[0]
    elif not thursday_match.empty:
        return thursday_match[0]

    # If no exact match, find the closest available date
    all_dates = date_index
    closest_date = min(all_dates, key=lambda x: abs(x - target_friday))

    return closest_date

def get_date_one_year_back(data, from_date=None):
    """Calculate the date from the given date (or last date in stock data) to 1 year back"""
    if from_date:
        last_date = pd.to_datetime(from_date).normalize()
    else:
        last_date = pd.to_datetime(data.index[-1]).normalize()
    target_date = last_date - pd.DateOffset(months=12)
    return find_closest_trading_day(data, target_date)

def get_date_six_months_back(data, from_date=None):
    """Calculate the date from the given date (or last date in stock data) to 6 months back"""
    if from_date:
        last_date = pd.to_datetime(from_date).normalize()
    else:
        last_date = pd.to_datetime(data.index[-1]).normalize()
    target_date = last_date - pd.DateOffset(months=6)
    return find_closest_trading_day(data, target_date)

def get_first_available_date(data):
    """Get the first date for which price data is available"""
    return pd.to_datetime(data.index[0]).normalize()

def get_prices_for_dates(data, end_date=None):
    """Get prices for specified date (or current date), 6 months back and 1 year back, and calculate price changes.

    Args:
        data (pd.DataFrame): Stock price data with 'Close' prices
        end_date (str): End date in 'YYYY-MM-DD' format. If None, uses last date in data

    Returns:
        dict: Dictionary containing price data and changes for different time periods
    """
    # Get the actual last date from data
    actual_last_date = pd.to_datetime(data.index[-1]).normalize()

    # If end_date is provided but different from actual_last_date, use actual_last_date
    if end_date:
        requested_date = pd.to_datetime(end_date).normalize()
        if requested_date != actual_last_date:
            logger.info(f"Using last available date {actual_last_date.strftime('%Y-%m-%d')} instead of requested date {requested_date.strftime('%Y-%m-%d')}")
            current_date = actual_last_date
        else:
            current_date = requested_date
    else:
        current_date = actual_last_date

    six_month_date = get_date_six_months_back(data, current_date)
    one_year_date = get_date_one_year_back(data, current_date)
    first_available_date = get_first_available_date(data)

    # Handle multi-index columns if they exist
    if isinstance(data.columns, pd.MultiIndex):
        close_prices = data['Close'].iloc[:, 0]
    else:
        close_prices = data['Close']

    # Get current price (should always be available since we're using actual last date)
    current_price = close_prices[current_date]

    # Get 6-month price if available, otherwise use first available date
    try:
        six_month_price = close_prices[six_month_date]
        six_month_actual_date = six_month_date
    except KeyError:
        six_month_price = close_prices[first_available_date]
        six_month_actual_date = first_available_date

    # Get 1-year price if available, otherwise use first available date
    try:
        one_year_price = close_prices[one_year_date]
        one_year_actual_date = one_year_date
    except KeyError:
        one_year_price = close_prices[first_available_date]
        one_year_actual_date = first_available_date

    # Calculate price changes
    twelve_month_change = (current_price / one_year_price - 1) if one_year_price != 0 else 0
    six_month_change = (current_price / six_month_price - 1) if six_month_price != 0 else 0

    # Create a dictionary with dates, prices and price changes
    price_data = {
        'Current': {
            'date': current_date.strftime('%Y-%m-%d'),
            'price': round(current_price, 2)
        },
        '6_months_back': {
            'date': six_month_actual_date.strftime('%Y-%m-%d'),
            'price': round(six_month_price, 2),
            'price_change_percent': round(six_month_change * 100, 2),
            'is_first_available': six_month_actual_date == first_available_date
        },
        '1_year_back': {
            'date': one_year_actual_date.strftime('%Y-%m-%d'),
            'price': round(one_year_price, 2),
            'price_change_percent': round(twelve_month_change * 100, 2),
            'is_first_available': one_year_actual_date == first_available_date
        }
    }

    return price_data

In [None]:
def calculate_log_returns(data, end_date=None):
    """Calculate logarithmic returns between consecutive trading days for the past year
    Returns the log returns as percentages along with the prices used in calculations.

    Args:
        data (pd.DataFrame): Stock price data with 'Close' prices
        end_date (str): End date in 'YYYY-MM-DD' format. If None, uses last date in data

    Returns:
        tuple: (log_returns, price_data)
    """
    # Get the dates for 1 year period
    if end_date:
        current_date = pd.to_datetime(end_date).normalize()
    else:
        current_date = pd.to_datetime(data.index[-1]).normalize()

    one_year_back_date = get_date_one_year_back(data, end_date)

    # Handle multi-index columns if they exist
    if isinstance(data.columns, pd.MultiIndex):
        close_prices = data['Close'].iloc[:, 0]
    else:
        close_prices = data['Close']

    # Get the slice of data between one year back and current date
    mask = (data.index >= one_year_back_date) & (data.index <= current_date)
    period_prices = close_prices[mask]

    # Create a DataFrame to store prices and calculations
    price_data = pd.DataFrame({
        'Current_Price': period_prices.astype(float),
        'Previous_Price': period_prices.shift(1).astype(float)
    })

    # Calculate price ratios first (current/previous)
    price_data['Price_Ratio'] = price_data['Current_Price'] / price_data['Previous_Price']

    # Calculate log returns exactly as Excel does: LN(current/previous)
    price_data['Log_Return_Percent'] = np.log(price_data['Price_Ratio']) * 100

    # Round to 3 decimal places after all calculations
    price_data['Log_Return_Percent'] = price_data['Log_Return_Percent'].round(3)

    # Round other columns to 2 decimal places
    for column in ['Current_Price', 'Previous_Price', 'Price_Ratio']:
        price_data[column] = price_data[column].round(2)

    # Drop the first row as it will have NaN values due to the shift operation
    price_data = price_data.dropna()

    # Create the log returns series
    log_returns = price_data['Log_Return_Percent']

    return log_returns, price_data

In [None]:
def calculate_log_stdev(data, end_date=None):
    """Calculate standard deviation using all log values from last 1 year till the specified date.
    Similar to Excel's STDEV(A:B) where A:B would be the range of log values.
    Returns the standard deviation divided by 100.

    Args:
        data (pd.DataFrame): Stock price data with 'Close' prices
        end_date (str): End date in 'YYYY-MM-DD' format. If None, uses last date in data

    Returns:
        tuple: (one_year_stdev, six_month_stdev) where values are divided by 100
    """
    # Get the dates
    if end_date:
        current_date = pd.to_datetime(end_date).normalize()
    else:
        current_date = pd.to_datetime(data.index[-1]).normalize()

    six_month_date = get_date_six_months_back(data, end_date)
    one_year_date = get_date_one_year_back(data, end_date)

    # Get log returns for the entire period
    log_returns, _ = calculate_log_returns(data, end_date)

    try:
        # Get all log values from one year ago to current date
        one_year_mask = (log_returns.index >= one_year_date) & (log_returns.index <= current_date)
        one_year_logs = log_returns[one_year_mask]
        # Calculate standard deviation using all values in the period and divide by 100
        one_year_stdev = np.std(one_year_logs) / 100
    except KeyError:
        one_year_stdev = None

    try:
        # Get all log values from six months ago to current date
        six_month_mask = (log_returns.index >= six_month_date) & (log_returns.index <= current_date)
        six_month_logs = log_returns[six_month_mask]
        # Calculate standard deviation using all values in the period and divide by 100
        six_month_stdev = np.std(six_month_logs) / 100
    except KeyError:
        six_month_stdev = None

    return one_year_stdev, six_month_stdev

In [None]:
def calculate_momentum_ratio(data, end_date=None):
    """Calculate momentum ratios for 1 year and 6 months periods.
    Momentum ratio = Price change percentage / (Standard deviation * 100)

    Args:
        data (pd.DataFrame): Stock price data with 'Close' prices
        end_date (str): End date in 'YYYY-MM-DD' format. If None, uses last date in data

    Returns:
        dict: Dictionary containing momentum ratios for both periods
    """
    # Get price changes
    prices_data = get_prices_for_dates(data, end_date)
    one_year_change = prices_data['1_year_back']['price_change_percent']
    six_month_change = prices_data['6_months_back']['price_change_percent']

    # Get standard deviations (already divided by 100 in calculate_log_stdev)
    one_year_stdev, six_month_stdev = calculate_log_stdev(data, end_date)

    # Calculate momentum ratios
    # Note: We multiply stdev by 100 to match the scale of price_change_percent
    one_year_momentum = one_year_change / (one_year_stdev * 100) if one_year_stdev else None
    six_month_momentum = six_month_change / (six_month_stdev * 100) if six_month_stdev else None

    result = {
        'momentum_ratios': {
            'one_year': round(one_year_momentum, 4) if one_year_momentum is not None else None,
            'six_month': round(six_month_momentum, 4) if six_month_momentum is not None else None
        },
        'components': {
            'one_year': {
                'price_change_percent': one_year_change,
                'standard_deviation': one_year_stdev * 100 if one_year_stdev else None
            },
            'six_month': {
                'price_change_percent': six_month_change,
                'standard_deviation': six_month_stdev * 100 if six_month_stdev else None
            }
        }
    }

    return result

In [None]:
def calculate_universe_stats(momentum_values):
    """Calculate standard deviation and mean for a list of momentum ratios.
    Similar to Excel's STDEV and AVERAGE functions.

    Args:
        momentum_values: List of momentum ratio values

    Returns:
        tuple: (standard_deviation, mean)
    """
    # Remove None and NaN values
    valid_values = [v for v in momentum_values if v is not None and not np.isnan(v)]

    if not valid_values:
        return None, None

    # Calculate standard deviation (similar to Excel's STDEV)
    stdev = np.std(valid_values, ddof=1)  # ddof=1 for sample standard deviation (like Excel)

    # Calculate mean (similar to Excel's AVERAGE)
    mean = np.mean(valid_values)

    return stdev, mean

In [None]:
def calculate_z_score(value, mean, stdev):
    """Calculate z-score for a value.

    Z-score = (value - mean) / standard_deviation

    Args:
        value: The value to calculate z-score for
        mean: Mean of the universe
        stdev: Standard deviation of the universe

    Returns:
        float: z-score or None if inputs are invalid
    """
    if value is None or mean is None or stdev is None or stdev == 0:
        return None

    return (value - mean) / stdev

In [None]:
def calculate_weighted_z_score(one_year_z_score, six_month_z_score, weights=None):
    """Calculate weighted z-score from 1-year and 6-month z-scores.

    Args:
        one_year_z_score: Z-score for 1-year momentum ratio
        six_month_z_score: Z-score for 6-month momentum ratio
        weights: Dictionary with weights for each period.
                Default is {'one_year': 0.5, 'six_month': 0.5}

    Returns:
        float: Weighted z-score or None if inputs are invalid
    """
    if one_year_z_score is None or six_month_z_score is None:
        return None

    # Use default weights if none provided
    if weights is None:
        weights = {'one_year': 0.5, 'six_month': 0.5}

    weighted_z_score = (
        one_year_z_score * weights['one_year'] +
        six_month_z_score * weights['six_month']
    )

    return weighted_z_score

In [None]:
def calculate_normalized_z_score(z_score):
    """Calculate normalized z-score using the formula: IF(z_score>0,(1+z_score),(1-z_score)^-1)

    Args:
        z_score: The z-score to normalize

    Returns:
        float: Normalized z-score or None if input is invalid
    """
    if z_score is None:
        return None

    return (1 + z_score) if z_score > 0 else (1 - z_score) ** -1

In [None]:
def calculate_rank(value, values_list):
    """Calculate the rank of a value within a list. Similar to Excel's RANK(number, ref, [order])

    Args:
        value: The number whose rank you want to find
        values_list: A list of numbers that defines the relative ranking

    Returns:
        int: The rank of the number (1 being the highest)
    """
    # Remove None and NaN values from the list
    valid_values = [v for v in values_list if v is not None and not np.isnan(v)]

    if value is None or np.isnan(value) or not valid_values:
        return None

    # Sort values in descending order (mimicking Excel's RANK with order=0)
    sorted_values = sorted(set(valid_values), reverse=True)

    try:
        # Find the rank (adding 1 because index starts at 0)
        rank = sorted_values.index(value) + 1
        return rank
    except ValueError:
        # Try to find closest match for floating point comparison issues
        closest_idx = min(range(len(sorted_values)),
                         key=lambda i: abs(sorted_values[i] - value))
        return closest_idx + 1

In [None]:
def generate_rank_analysis_markdown(stock_data, symbol, one_year_rank=None, six_month_rank=None, total_stocks=None):
    """Generate markdown documentation for RANK analysis calculations.

    This function creates detailed markdown explaining all calculations used in the analysis.
    """
    md_content = "## RANK Analysis\n\n"

    # Date Calculations Section
    md_content += "### Date Reference Points\n\n"
    current_date = stock_data.index[-1].strftime('%Y-%m-%d')
    one_year_back_date = get_date_one_year_back(stock_data)
    six_month_back_date = get_date_six_months_back(stock_data)

    md_content += f"- **Current Date:** {current_date}\n"
    md_content += f"- **1-Year Reference Date:** {one_year_back_date.strftime('%Y-%m-%d')}\n"
    md_content += f"- **6-Month Reference Date:** {six_month_back_date.strftime('%Y-%m-%d')}\n\n"

    # Price Changes Section
    md_content += "### Price Changes\n\n"
    current_price = stock_data['Close'].iloc[-1]
    year_start_price = stock_data.loc[one_year_back_date, 'Close']
    six_month_start_price = stock_data.loc[six_month_back_date, 'Close']

    year_change = (current_price / year_start_price - 1) * 100
    six_month_change = (current_price / six_month_start_price - 1) * 100

    md_content += f"**1-Year Price Change:**\n"
    md_content += f"- Start Price: ₹{year_start_price:.2f}\n"
    md_content += f"- Current Price: ₹{current_price:.2f}\n"
    md_content += f"- Change: {year_change:.2f}%\n\n"

    md_content += f"**6-Month Price Change:**\n"
    md_content += f"- Start Price: ₹{six_month_start_price:.2f}\n"
    md_content += f"- Current Price: ₹{current_price:.2f}\n"
    md_content += f"- Change: {six_month_change:.2f}%\n\n"

    # Log Returns Section
    md_content += "### Log Returns and Standard Deviation\n\n"
    log_returns, price_data = calculate_log_returns(stock_data)
    one_year_stdev, six_month_stdev = calculate_log_stdev(stock_data)

    md_content += "**Log Returns Calculation:**\n"
    md_content += "- Formula: `ln(current_price / previous_price) * 100`\n"
    md_content += f"- Recent Log Returns (last 5 days):\n"
    for date, row in price_data.tail().iterrows():
        md_content += f"  * {date.strftime('%Y-%m-%d')}: {row['Log_Return_Percent']:.3f}%\n"

    md_content += "\n**Standard Deviation:**\n"
    md_content += f"- 1-Year σ: {one_year_stdev*100:.3f}%\n"
    md_content += f"- 6-Month σ: {six_month_stdev*100:.3f}%\n\n"

    # Momentum Ratio Section
    md_content += "### Momentum Ratio\n\n"
    momentum_ratio = calculate_momentum_ratio(stock_data)

    md_content += "**Momentum Ratio Formula:** Price Change % / (Standard Deviation * 100)\n\n"

    md_content += "**1-Year Momentum Components:**\n"
    md_content += f"- Price Change: {momentum_ratio['components']['one_year']['price_change_percent']:.2f}%\n"
    md_content += f"- Standard Deviation: {momentum_ratio['components']['one_year']['standard_deviation']:.2f}%\n"
    md_content += f"- Momentum Ratio: {momentum_ratio['momentum_ratios']['one_year']:.4f}\n"
    if one_year_rank is not None and total_stocks is not None:
        md_content += f"- Rank: {one_year_rank} out of {total_stocks}\n\n"

    md_content += "**6-Month Momentum Components:**\n"
    md_content += f"- Price Change: {momentum_ratio['components']['six_month']['price_change_percent']:.2f}%\n"
    md_content += f"- Standard Deviation: {momentum_ratio['components']['six_month']['standard_deviation']:.2f}%\n"
    md_content += f"- Momentum Ratio: {momentum_ratio['momentum_ratios']['six_month']:.4f}\n"
    if six_month_rank is not None and total_stocks is not None:
        md_content += f"- Rank: {six_month_rank} out of {total_stocks}\n\n"

    return md_content

In [None]:
def analyze_stocks(stocks_df, output_dir="stock_analysis", max_workers=5, end_date=None):
    """Analyze a list of stocks and calculate momentum metrics for each.

    Args:
        stocks_df: List of tuples (company_name, symbol)
        output_dir: Directory to store analysis results
        max_workers: Maximum number of parallel workers
        end_date: End date for analysis in YYYY-MM-DD format
    """
    os.makedirs(output_dir, exist_ok=True)
    results = {}

    # Global rank data structure to store universe-wide statistics
    global rank_data
    rank_data = {
        'one_year': {'values': [], 'ranks': {}, 'universe_stats': {'stdev': None, 'mean': None}},
        'six_month': {'values': [], 'ranks': {}, 'universe_stats': {'stdev': None, 'mean': None}},
        'weighted_z_score': {'values': [], 'ranks': {}},
        'normalized_z_score': {'values': [], 'ranks': {}}
    }

    def process_stock(stock_tuple):
        company_name, symbol = stock_tuple
        logger.info(f"Processing {company_name} ({symbol})")

        try:
            # Get stock data with actual end date
            data, actual_end_date = get_stock_data(symbol, period='2y', end_date=end_date)
            if data.empty:
                logger.warning(f"No data available for {symbol}")
                return None

            # Use actual end date for all calculations
            actual_end_date_str = actual_end_date.strftime('%Y-%m-%d')

            # Calculate technical indicators
            rsi = calculate_rsi(data)
            macd_line, signal_line, histogram = calculate_macd(data)
            momentum_period20 = calculate_momentum(data, period=20)
            momentum_index = calculate_momentum_index(data)
            momentum_ratio = calculate_momentum_ratio(data, actual_end_date_str)

            # Get price data using actual end date
            prices = get_prices_for_dates(data, actual_end_date_str)

            # Determine strengths and weaknesses
            strengths, weaknesses = determine_strength(data, rsi, macd_line, signal_line)

            # Store the result with all necessary data
            result = {
                'company_name': company_name,
                'symbol': symbol,
                'actual_end_date': actual_end_date_str,
                'current_price': prices['Current']['price'],
                'price_data': prices,
                'technical_indicators': {
                    'rsi': rsi.iloc[-1] if not rsi.empty else None,
                    'macd': {
                        'macd_line': macd_line.iloc[-1] if not macd_line.empty else None,
                        'signal_line': signal_line.iloc[-1] if not signal_line.empty else None,
                        'histogram': histogram.iloc[-1] if not histogram.empty else None
                    },
                    'momentum': {
                        'momentum_20day': momentum_period20.iloc[-1] if not momentum_period20.empty else None,
                        'momentum_index': momentum_index.iloc[-1] if not momentum_index.empty else None,
                        'momentum_ratio': momentum_ratio
                    }
                },
                'analysis': {
                    'strengths': strengths,
                    'weaknesses': weaknesses
                },
                'data': data
            }

            # Store momentum values for universe calculations
            one_year_momentum = momentum_ratio['momentum_ratios']['one_year']
            six_month_momentum = momentum_ratio['momentum_ratios']['six_month']

            if one_year_momentum is not None:
                rank_data['one_year']['values'].append(one_year_momentum)
            if six_month_momentum is not None:
                rank_data['six_month']['values'].append(six_month_momentum)

            return result

        except Exception as e:
            logger.error(f"Error processing {symbol}: {e}")
            return None

    # Process stocks in parallel
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(process_stock, stock): stock for stock in stocks_df}

        for future in concurrent.futures.as_completed(futures):
            stock = futures[future]
            try:
                result = future.result()
                if result:
                    results[result['symbol']] = result
            except Exception as e:
                logger.error(f"Error processing {stock}: {e}")

    # Calculate universe statistics for both periods
    for period in ['one_year', 'six_month']:
        values = rank_data[period]['values']
        stdev, mean = calculate_universe_stats(values)
        rank_data[period]['universe_stats']['stdev'] = stdev
        rank_data[period]['universe_stats']['mean'] = mean

        # Calculate ranks and z-scores
        for symbol, result in results.items():
            momentum = result['technical_indicators']['momentum']['momentum_ratio']['momentum_ratios'][period]
            if momentum is not None:
                rank = calculate_rank(momentum, values)
                z_score = calculate_z_score(momentum, mean, stdev)
                rank_data[period]['ranks'][symbol] = rank

                # Update the result with rank and z-score information
                result['technical_indicators']['momentum']['momentum_ratio']['ranks'] = result['technical_indicators']['momentum']['momentum_ratio'].get('ranks', {})
                result['technical_indicators']['momentum']['momentum_ratio']['ranks'][period] = rank
                result['technical_indicators']['momentum']['momentum_ratio']['z_scores'] = result['technical_indicators']['momentum']['momentum_ratio'].get('z_scores', {})
                result['technical_indicators']['momentum']['momentum_ratio']['z_scores'][period] = z_score
                result['technical_indicators']['momentum']['momentum_ratio']['universe_stats'] = {
                    'stdev': stdev,
                    'mean': mean
                }

    # Calculate weighted z-score ranks and normalized z-scores
    weighted_z_scores = {}
    normalized_z_scores = {}
    for symbol, result in results.items():
        one_year_z_score = result['technical_indicators']['momentum']['momentum_ratio']['z_scores'].get('one_year')
        six_month_z_score = result['technical_indicators']['momentum']['momentum_ratio']['z_scores'].get('six_month')
        weighted_z_score = calculate_weighted_z_score(one_year_z_score, six_month_z_score)

        if weighted_z_score is not None:
            weighted_z_scores[symbol] = weighted_z_score
            rank_data['weighted_z_score']['values'].append(weighted_z_score)

            # Calculate normalized z-score
            normalized_z_score = calculate_normalized_z_score(weighted_z_score)
            if normalized_z_score is not None:
                normalized_z_scores[symbol] = normalized_z_score
                rank_data['normalized_z_score']['values'].append(normalized_z_score)

    # Calculate ranks for weighted z-scores
    rank_data['weighted_z_score']['values'] = [v for v in rank_data['weighted_z_score']['values'] if v is not None and not np.isnan(v)]
    sorted_weighted_z = sorted(rank_data['weighted_z_score']['values'], reverse=True)
    for symbol, weighted_z_score in weighted_z_scores.items():
        if weighted_z_score is not None and not np.isnan(weighted_z_score) and sorted_weighted_z:
            try:
                rank = sorted_weighted_z.index(weighted_z_score) + 1
            except ValueError:
                # Find the closest value if exact match not found due to floating point issues
                closest_idx = min(range(len(sorted_weighted_z)),
                                key=lambda i: abs(sorted_weighted_z[i] - weighted_z_score))
                rank = closest_idx + 1

            rank_data['weighted_z_score']['ranks'][symbol] = rank
            results[symbol]['technical_indicators']['momentum']['momentum_ratio']['weighted_z_score'] = {
                'score': weighted_z_score,
                'rank': rank,
                'total_stocks': len(sorted_weighted_z)  # Use count of valid values
            }

    # Calculate ranks for normalized z-scores
    rank_data['normalized_z_score']['values'] = [v for v in rank_data['normalized_z_score']['values'] if v is not None and not np.isnan(v)]
    sorted_normalized_z = sorted(rank_data['normalized_z_score']['values'], reverse=True)
    for symbol, normalized_z_score in normalized_z_scores.items():
        if normalized_z_score is not None and not np.isnan(normalized_z_score) and sorted_normalized_z:
            try:
                rank = sorted_normalized_z.index(normalized_z_score) + 1
            except ValueError:
                # Find the closest value if exact match not found due to floating point issues
                closest_idx = min(range(len(sorted_normalized_z)),
                                key=lambda i: abs(sorted_normalized_z[i] - normalized_z_score))
                rank = closest_idx + 1

            rank_data['normalized_z_score']['ranks'][symbol] = rank
            results[symbol]['technical_indicators']['momentum']['momentum_ratio']['normalized_z_score'] = {
                'score': normalized_z_score,
                'rank': rank,
                'total_stocks': len(sorted_normalized_z)  # Use count of valid values
            }

    # Generate markdown files
    for symbol, result in results.items():
        company_name = result['company_name']

        # Generate rank analysis markdown
        rank_analysis_md = generate_rank_analysis_markdown(
            result['data'],
            symbol,
            one_year_rank=rank_data['one_year']['ranks'].get(symbol),
            six_month_rank=rank_data['six_month']['ranks'].get(symbol),
            total_stocks=len(results)
        )

        # Create comprehensive markdown content
        md_content = f"# {company_name} ({symbol}) Analysis\n\n"

        md_content += f"Company Name (ticker): {symbol}\n\n"

        # Extract the category/universe from the output_dir path
        category = output_dir.split('/')[-1]  # This will extract "nifty_50", "midcap_150", etc.
        # Format the category for display - remove underscore and capitalize
        display_category = category.replace('_', ' ').title()
        md_content += f"Universe/Category: {display_category}\n\n"

        # Add actual date used for analysis
        if end_date and result['actual_end_date'] != end_date:
            md_content += f"*Note: Analysis uses last available date {result['actual_end_date']} instead of requested date {end_date}*\n\n"

        # Current Price and Performance
        md_content += "## Current Price and Performance\n\n"
        md_content += f"Current Price: Rs.{result['price_data']['Current']['price']:.2f}\n"
        if '1_year_back' in result['price_data']:
            md_content += f"1-Year Change: {result['price_data']['1_year_back']['price_change_percent']:.2f}%\n"
        if '6_months_back' in result['price_data']:
            md_content += f"6-Month Change: {result['price_data']['6_months_back']['price_change_percent']:.2f}%\n\n"

        # Technical Indicators
        md_content += "## Technical Indicators\n\n"
        if result['technical_indicators']['rsi'] is not None:
            md_content += f"RSI (14-day): {float(result['technical_indicators']['rsi']):.2f}\n"
        if result['technical_indicators']['macd']['macd_line'] is not None:
            md_content += f"MACD Line: {float(result['technical_indicators']['macd']['macd_line']):.2f}\n"
            md_content += f"Signal Line: {float(result['technical_indicators']['macd']['signal_line']):.2f}\n"
            md_content += f"MACD Histogram: {float(result['technical_indicators']['macd']['histogram']):.2f}\n"

        # Momentum Rankings and Universe Statistics
        md_content += "\n## Momentum Rankings and Universe Statistics\n\n"

        # 1-Year Stats
        md_content += "### 1-Year Momentum\n"
        one_year_rank = rank_data['one_year']['ranks'].get(symbol)
        one_year_momentum = result['technical_indicators']['momentum']['momentum_ratio']['momentum_ratios']['one_year']
        one_year_z_score = result['technical_indicators']['momentum']['momentum_ratio']['z_scores'].get('one_year')

        if one_year_rank and one_year_momentum:
            md_content += f"- Rank: {one_year_rank} out of {len(results)}\n"
            md_content += f"- Momentum Ratio: {one_year_momentum:.4f}\n"
            md_content += f"- Universe Mean: {rank_data['one_year']['universe_stats']['mean']:.4f}\n"
            md_content += f"- Universe StDev: {rank_data['one_year']['universe_stats']['stdev']:.4f}\n"
            if one_year_z_score is not None:
                md_content += f"- Z-Score: {one_year_z_score:.4f}\n"

        # 6-Month Stats
        md_content += "\n### 6-Month Momentum\n"
        six_month_rank = rank_data['six_month']['ranks'].get(symbol)
        six_month_momentum = result['technical_indicators']['momentum']['momentum_ratio']['momentum_ratios']['six_month']
        six_month_z_score = result['technical_indicators']['momentum']['momentum_ratio']['z_scores'].get('six_month')

        if six_month_rank and six_month_momentum:
            md_content += f"- Rank: {six_month_rank} out of {len(results)}\n"
            md_content += f"- Momentum Ratio: {six_month_momentum:.4f}\n"
            md_content += f"- Universe Mean: {rank_data['six_month']['universe_stats']['mean']:.4f}\n"
            md_content += f"- Universe StDev: {rank_data['six_month']['universe_stats']['stdev']:.4f}\n"
            if six_month_z_score is not None:
                md_content += f"- Z-Score: {six_month_z_score:.4f}\n"

        # Add dedicated Rank and Z-Score section
        one_year_z_score = result['technical_indicators']['momentum']['momentum_ratio']['z_scores'].get('one_year')
        six_month_z_score = result['technical_indicators']['momentum']['momentum_ratio']['z_scores'].get('six_month')
        weighted_z_score_data = result['technical_indicators']['momentum']['momentum_ratio'].get('weighted_z_score', {})
        normalized_z_score_data = result['technical_indicators']['momentum']['momentum_ratio'].get('normalized_z_score', {})

        md_content += "\n## Rank and Z-Score Analysis\n\n"
        md_content += "### Z-Scores\n"
        if one_year_z_score is not None:
            md_content += f"- 1-Year Z-Score: {one_year_z_score:.4f}\n"
        if six_month_z_score is not None:
            md_content += f"- 6-Month Z-Score: {six_month_z_score:.4f}\n"
        if weighted_z_score_data.get('score') is not None:
            md_content += f"- Weighted Z-Score: {weighted_z_score_data['score']:.4f}\n"
            md_content += "  *(Calculated as: 1-Year Z-Score × 0.5 + 6-Month Z-Score × 0.5)*\n"
        if normalized_z_score_data.get('score') is not None:
            md_content += f"- Normalized Z-Score: {normalized_z_score_data['score']:.4f}\n"
            md_content += "  *(Calculated as: IF(weighted_z_score>0, (1+weighted_z_score), (1-weighted_z_score)^-1))*\n"

        md_content += "\n### Rankings\n"
        if one_year_rank:
            md_content += f"- 1-Year Momentum Rank: {one_year_rank} out of {len(results)}\n"
        if six_month_rank:
            md_content += f"- 6-Month Momentum Rank: {six_month_rank} out of {len(results)}\n"
        if weighted_z_score_data.get('rank') is not None:
            md_content += f"- Weighted Z-Score Rank: {weighted_z_score_data['rank']} out of {weighted_z_score_data['total_stocks']}\n"
        if normalized_z_score_data.get('rank') is not None:
            md_content += f"- Normalized Z-Score Rank: {normalized_z_score_data['rank']} out of {normalized_z_score_data['total_stocks']}\n"

        # Add universe statistics only if they exist
        md_content += "\n### Universe Statistics\n"
        md_content += "**1-Year Momentum:**\n"
        if rank_data['one_year']['universe_stats']['mean'] is not None and not np.isnan(rank_data['one_year']['universe_stats']['mean']):
            md_content += f"- Mean: {rank_data['one_year']['universe_stats']['mean']:.4f}\n"
        else:
            md_content += "- Mean: Not available\n"

        if rank_data['one_year']['universe_stats']['stdev'] is not None and not np.isnan(rank_data['one_year']['universe_stats']['stdev']):
            md_content += f"- Standard Deviation: {rank_data['one_year']['universe_stats']['stdev']:.4f}\n\n"
        else:
            md_content += "- Standard Deviation: Not available\n\n"

        md_content += "**6-Month Momentum:**\n"
        if rank_data['six_month']['universe_stats']['mean'] is not None and not np.isnan(rank_data['six_month']['universe_stats']['mean']):
            md_content += f"- Mean: {rank_data['six_month']['universe_stats']['mean']:.4f}\n"
        else:
            md_content += "- Mean: Not available\n"

        if rank_data['six_month']['universe_stats']['stdev'] is not None and not np.isnan(rank_data['six_month']['universe_stats']['stdev']):
            md_content += f"- Standard Deviation: {rank_data['six_month']['universe_stats']['stdev']:.4f}\n"
        else:
            md_content += "- Standard Deviation: Not available\n"

        # Strengths and Weaknesses
        md_content += "\n## Analysis\n\n"
        md_content += "### Strengths\n"
        for strength in result['analysis']['strengths']:
            md_content += f"- {strength}\n"

        md_content += "\n### Weaknesses\n"
        for weakness in result['analysis']['weaknesses']:
            md_content += f"- {weakness}\n"

        # Add rank analysis content
        md_content += "\n" + rank_analysis_md

        # Save markdown file with all analysis
        analysis_file = os.path.join(output_dir, f"{symbol}.md")
        with file_lock:
            with open(analysis_file, 'w', encoding='utf-8') as f:
                f.write(md_content)

    return results

In [None]:
def parse_stock_list(stock_list_text):
    stocks = []
    lines = stock_list_text.strip().split('\n')

    for line in lines:
        if '\t' in line:
            parts = line.split('\t')
            if len(parts) >= 2:
                company_name = parts[0].strip()
                symbol = parts[1].strip()
                if company_name not in ['Company Name', ''] and symbol not in ['Symbol', '']:
                    stocks.append((company_name, symbol))

    return stocks

In [None]:
# Example usage:
stock_list_nifty = """Company Name	Symbol
Adani Enterprises Ltd.	ADANIENT
Adani Ports and Special Economic Zone Ltd.	ADANIPORTS
Apollo Hospitals Enterprise Ltd.	APOLLOHOSP
Asian Paints Ltd.	ASIANPAINT
Axis Bank Ltd.	AXISBANK
Bajaj Auto Ltd.	BAJAJ-AUTO
Bajaj Finance Ltd.	BAJFINANCE
Bajaj Finserv Ltd.	BAJAJFINSV
Bharat Electronics Ltd.	BEL
Bharti Airtel Ltd.	BHARTIARTL
Cipla Ltd.	CIPLA
Coal India Ltd.	COALINDIA
Dr. Reddy's Laboratories Ltd.	DRREDDY
Eicher Motors Ltd.	EICHERMOT
Grasim Industries Ltd.	GRASIM
HCL Technologies Ltd.	HCLTECH
HDFC Bank Ltd.	HDFCBANK
HDFC Life Insurance Company Ltd.	HDFCLIFE
Hero MotoCorp Ltd.	HEROMOTOCO
Hindalco Industries Ltd.	HINDALCO
Hindustan Unilever Ltd.	HINDUNILVR
ICICI Bank Ltd.	ICICIBANK
ITC Ltd.	ITC
IndusInd Bank Ltd.	INDUSINDBK
Infosys Ltd.	INFY
JSW Steel Ltd.	JSWSTEEL
Jio Financial Services Ltd.	JIOFIN
Kotak Mahindra Bank Ltd.	KOTAKBANK
Larsen & Toubro Ltd.	LT
Mahindra & Mahindra Ltd.	M&M
Maruti Suzuki India Ltd.	MARUTI
NTPC Ltd.	NTPC
Nestle India Ltd.	NESTLEIND
Oil & Natural Gas Corporation Ltd.	ONGC
Power Grid Corporation of India Ltd.	POWERGRID
Reliance Industries Ltd.	RELIANCE
SBI Life Insurance Company Ltd.	SBILIFE
Shriram Finance Ltd.	SHRIRAMFIN
State Bank of India	SBIN
Sun Pharmaceutical Industries Ltd.	SUNPHARMA
Tata Consultancy Services Ltd.	TCS
Tata Consumer Products Ltd.	TATACONSUM
Tata Motors Ltd.	TATAMOTORS
Tata Steel Ltd.	TATASTEEL
Tech Mahindra Ltd.	TECHM
Titan Company Ltd.	TITAN
Trent Ltd.	TRENT
UltraTech Cement Ltd.	ULTRACEMCO
Wipro Ltd.	WIPRO
Zomato Ltd.	ZOMATO"""

stocks_nifty = parse_stock_list(stock_list_nifty)
results_nifty50 = analyze_stocks(stocks_nifty, output_dir="stock_analysis/reports_v2/nifty_50", max_workers=10, end_date="2025-04-12")

In [None]:
stock_list_midcap = """Company Name	Symbol
360 ONE WAM Ltd.	360ONE
3M India Ltd.	3MINDIA
ACC Ltd.	ACC
AIA Engineering Ltd.	AIAENG
APL Apollo Tubes Ltd.	APLAPOLLO
AU Small Finance Bank Ltd.	AUBANK
Abbott India Ltd.	ABBOTINDIA
Adani Total Gas Ltd.	ATGL
Adani Wilmar Ltd.	AWL
Aditya Birla Capital Ltd.	ABCAPITAL
Aditya Birla Fashion and Retail Ltd.	ABFRL
Ajanta Pharmaceuticals Ltd.	AJANTPHARM
Alkem Laboratories Ltd.	ALKEM
Apar Industries Ltd.	APARINDS
Apollo Tyres Ltd.	APOLLOTYRE
Ashok Leyland Ltd.	ASHOKLEY
Astral Ltd.	ASTRAL
Aurobindo Pharma Ltd.	AUROPHARMA
BSE Ltd.	BSE
Balkrishna Industries Ltd.	BALKRISIND
Bandhan Bank Ltd.	BANDHANBNK
Bank of India	BANKINDIA
Bank of Maharashtra	MAHABANK
Berger Paints India Ltd.	BERGEPAINT
Bharat Dynamics Ltd.	BDL
Bharat Forge Ltd.	BHARATFORG
Bharat Heavy Electricals Ltd.	BHEL
Bharti Hexacom Ltd.	BHARTIHEXA
Biocon Ltd.	BIOCON
Blue Star Ltd.	BLUESTARCO
CRISIL Ltd.	CRISIL
Cochin Shipyard Ltd.	COCHINSHIP
Coforge Ltd.	COFORGE
Colgate Palmolive (India) Ltd.	COLPAL
Container Corporation of India Ltd.	CONCOR
Coromandel International Ltd.	COROMANDEL
Cummins India Ltd.	CUMMINSIND
Dalmia Bharat Ltd.	DALBHARAT
Deepak Nitrite Ltd.	DEEPAKNTR
Dixon Technologies (India) Ltd.	DIXON
Emami Ltd.	EMAMILTD
Endurance Technologies Ltd.	ENDURANCE
Escorts Kubota Ltd.	ESCORTS
Exide Industries Ltd.	EXIDEIND
FSN E-Commerce Ventures Ltd.	NYKAA
Federal Bank Ltd.	FEDERALBNK
Fortis Healthcare Ltd.	FORTIS
GE Vernova T&D India Ltd.	GVT&D
GMR Airports Ltd.	GMRAIRPORT
General Insurance Corporation of India	GICRE
Gland Pharma Ltd.	GLAND
Glaxosmithkline Pharmaceuticals Ltd.	GLAXO
Glenmark Pharmaceuticals Ltd.	GLENMARK
Global Health Ltd.	MEDANTA
Godrej Industries Ltd.	GODREJIND
Godrej Properties Ltd.	GODREJPROP
Gujarat Fluorochemicals Ltd.	FLUOROCHEM
Gujarat Gas Ltd.	GUJGASLTD
HDFC Asset Management Company Ltd.	HDFCAMC
Hindustan Petroleum Corporation Ltd.	HINDPETRO
Hindustan Zinc Ltd.	HINDZINC
Hitachi Energy India Ltd.	POWERINDIA
Honeywell Automation India Ltd.	HONAUT
Housing & Urban Development Corporation Ltd.	HUDCO
IDFC First Bank Ltd.	IDFCFIRSTB
IRB Infrastructure Developers Ltd.	IRB
Indian Bank	INDIANB
Indian Railway Catering And Tourism Corporation Ltd.	IRCTC
Indian Renewable Energy Development Agency Ltd.	IREDA
Indraprastha Gas Ltd.	IGL
Indus Towers Ltd.	INDUSTOWER
Ipca Laboratories Ltd.	IPCALAB
J.K. Cement Ltd.	JKCEMENT
JSW Infrastructure Ltd.	JSWINFRA
Jindal Stainless Ltd.	JSL
Jubilant Foodworks Ltd.	JUBLFOOD
K.P.R. Mill Ltd.	KPRMILL
KEI Industries Ltd.	KEI
KPIT Technologies Ltd.	KPITTECH
Kalyan Jewellers India Ltd.	KALYANKJIL
L&T Finance Ltd.	LTF
L&T Technology Services Ltd.	LTTS
LIC Housing Finance Ltd.	LICHSGFIN
Linde India Ltd.	LINDEINDIA
Lloyds Metals And Energy Ltd.	LLOYDSME
Lupin Ltd.	LUPIN
MRF Ltd.	MRF
Mahindra & Mahindra Financial Services Ltd.	M&MFIN
Mangalore Refinery & Petrochemicals Ltd.	MRPL
Mankind Pharma Ltd.	MANKIND
Marico Ltd.	MARICO
Max Financial Services Ltd.	MFSL
Max Healthcare Institute Ltd.	MAXHEALTH
Mazagoan Dock Shipbuilders Ltd.	MAZDOCK
Motherson Sumi Wiring India Ltd.	MSUMI
Motilal Oswal Financial Services Ltd.	MOTILALOFS
MphasiS Ltd.	MPHASIS
Muthoot Finance Ltd.	MUTHOOTFIN
NHPC Ltd.	NHPC
NLC India Ltd.	NLCINDIA
NMDC Ltd.	NMDC
NTPC Green Energy Ltd.	NTPCGREEN
National Aluminium Co. Ltd.	NATIONALUM
Nippon Life India Asset Management Ltd.	NAM-INDIA
Oberoi Realty Ltd.	OBEROIRLTY
Oil India Ltd.	OIL
Ola Electric Mobility Ltd.	OLAELEC
One 97 Communications Ltd.	PAYTM
Oracle Financial Services Software Ltd.	OFSS
PB Fintech Ltd.	POLICYBZR
PI Industries Ltd.	PIIND
Page Industries Ltd.	PAGEIND
Patanjali Foods Ltd.	PATANJALI
Persistent Systems Ltd.	PERSISTENT
Petronet LNG Ltd.	PETRONET
Phoenix Mills Ltd.	PHOENIXLTD
Polycab India Ltd.	POLYCAB
Premier Energies Ltd.	PREMIERENE
Prestige Estates Projects Ltd.	PRESTIGE
Rail Vikas Nigam Ltd.	RVNL
SBI Cards and Payment Services Ltd.	SBICARD
SJVN Ltd.	SJVN
SRF Ltd.	SRF
Schaeffler India Ltd.	SCHAEFFLER
Solar Industries India Ltd.	SOLARINDS
Sona BLW Precision Forgings Ltd.	SONACOMS
Star Health and Allied Insurance Company Ltd.	STARHEALTH
Steel Authority of India Ltd.	SAIL
Sun TV Network Ltd.	SUNTV
Sundaram Finance Ltd.	SUNDARMFIN
Supreme Industries Ltd.	SUPREMEIND
Suzlon Energy Ltd.	SUZLON
Syngene International Ltd.	SYNGENE
Tata Communications Ltd.	TATACOMM
Tata Elxsi Ltd.	TATAELXSI
Tata Investment Corporation Ltd.	TATAINVEST
Tata Technologies Ltd.	TATATECH
The New India Assurance Company Ltd.	NIACL
Thermax Ltd.	THERMAX
Torrent Power Ltd.	TORNTPOWER
Tube Investments of India Ltd.	TIINDIA
UNO Minda Ltd.	UNOMINDA
UPL Ltd.	UPL
Union Bank of India	UNIONBANK
United Breweries Ltd.	UBL
Vishal Mega Mart Ltd.	VMM
Vodafone Idea Ltd.	IDEA
Voltas Ltd.	VOLTAS
Waaree Energies Ltd.	WAAREEENER
Yes Bank Ltd.	YESBANK"""


stocks_midcap = parse_stock_list(stock_list_midcap)
results_midcap = analyze_stocks(stocks_midcap, output_dir="stock_analysis/reports_v2/midcap_150", max_workers=10, end_date="2025-04-12")

In [None]:
stock_list_smallcap = """Company Name	Symbol
ACME Solar Holdings Ltd.	ACMESOLAR
Aadhar Housing Finance Ltd.	AADHARHFC
Aarti Industries Ltd.	AARTIIND
Aavas Financiers Ltd.	AAVAS
Action Construction Equipment Ltd.	ACE
Aditya Birla Real Estate Ltd.	ABREL
Aditya Birla Sun Life AMC Ltd.	ABSLAMC
Aegis Logistics Ltd.	AEGISLOG
Afcons Infrastructure Ltd.	AFCONS
Affle (India) Ltd.	AFFLE
Akums Drugs and Pharmaceuticals Ltd.	AKUMS
Alembic Pharmaceuticals Ltd.	APLLTD
Alivus Life Sciences Ltd.	ALIVUS
Alkyl Amines Chemicals Ltd.	ALKYLAMINE
Alok Industries Ltd.	ALOKINDS
Amara Raja Energy & Mobility Ltd.	ARE&M
Amber Enterprises India Ltd.	AMBER
Anand Rathi Wealth Ltd.	ANANDRATHI
Anant Raj Ltd.	ANANTRAJ
Angel One Ltd.	ANGELONE
Aptus Value Housing Finance India Ltd.	APTUS
Asahi India Glass Ltd.	ASAHIINDIA
Aster DM Healthcare Ltd.	ASTERDM
AstraZenca Pharma India Ltd.	ASTRAZEN
Atul Ltd.	ATUL
Authum Investment & Infrastructure Ltd.	AIIL
BASF India Ltd.	BASF
BEML Ltd.	BEML
BLS International Services Ltd.	BLS
Balrampur Chini Mills Ltd.	BALRAMCHIN
Bata India Ltd.	BATAINDIA
Bayer Cropscience Ltd.	BAYERCROP
Bikaji Foods International Ltd.	BIKAJI
Birlasoft Ltd.	BSOFT
Blue Dart Express Ltd.	BLUEDART
Bombay Burmah Trading Corporation Ltd.	BBTC
Brainbees Solutions Ltd.	FIRSTCRY
Brigade Enterprises Ltd.	BRIGADE
C.E. Info Systems Ltd.	MAPMYINDIA
CCL Products (I) Ltd.	CCL
CESC Ltd.	CESC
Campus Activewear Ltd.	CAMPUS
Can Fin Homes Ltd.	CANFINHOME
Caplin Point Laboratories Ltd.	CAPLIPOINT
Capri Global Capital Ltd.	CGCL
Carborundum Universal Ltd.	CARBORUNIV
Castrol India Ltd.	CASTROLIND
Ceat Ltd.	CEATLTD
Central Bank of India	CENTRALBK
Central Depository Services (India) Ltd.	CDSL
Century Plyboards (India) Ltd.	CENTURYPLY
Cera Sanitaryware Ltd	CERA
Chalet Hotels Ltd.	CHALET
Chambal Fertilizers & Chemicals Ltd.	CHAMBLFERT
Chennai Petroleum Corporation Ltd.	CHENNPETRO
Cholamandalam Financial Holdings Ltd.	CHOLAHLDNG
City Union Bank Ltd.	CUB
Clean Science and Technology Ltd.	CLEAN
Computer Age Management Services Ltd.	CAMS
Concord Biotech Ltd.	CONCORDBIO
Craftsman Automation Ltd.	CRAFTSMAN
CreditAccess Grameen Ltd.	CREDITACC
Crompton Greaves Consumer Electricals Ltd.	CROMPTON
Cyient Ltd.	CYIENT
DCM Shriram Ltd.	DCMSHRIRAM
DOMS Industries Ltd.	DOMS
Data Patterns (India) Ltd.	DATAPATTNS
Deepak Fertilisers & Petrochemicals Corp. Ltd.	DEEPAKFERT
Delhivery Ltd.	DELHIVERY
Devyani International Ltd.	DEVYANI
Dr. Lal Path Labs Ltd.	LALPATHLAB
E.I.D. Parry (India) Ltd.	EIDPARRY
EIH Ltd.	EIHOTEL
Elecon Engineering Co. Ltd.	ELECON
Elgi Equipments Ltd.	ELGIEQUIP
Emcure Pharmaceuticals Ltd.	EMCURE
Engineers India Ltd.	ENGINERSIN
Eris Lifesciences Ltd.	ERIS
Fertilisers and Chemicals Travancore Ltd.	FACT
Finolex Cables Ltd.	FINCABLES
Finolex Industries Ltd.	FINPIPE
Firstsource Solutions Ltd.	FSL
Five-Star Business Finance Ltd.	FIVESTAR
Garden Reach Shipbuilders & Engineers Ltd.	GRSE
Gillette India Ltd.	GILLETTE
Go Digit General Insurance Ltd.	GODIGIT
Godawari Power & Ispat Ltd.	GPIL
Godfrey Phillips India Ltd.	GODFRYPHLP
Godrej Agrovet Ltd.	GODREJAGRO
Granules India Ltd.	GRANULES
Graphite India Ltd.	GRAPHITE
Gravita India Ltd.	GRAVITA
Great Eastern Shipping Co. Ltd.	GESHIP
Gujarat Mineral Development Corporation Ltd.	GMDCLTD
Gujarat Narmada Valley Fertilizers and Chemicals Ltd.	GNFC
Gujarat Pipavav Port Ltd.	GPPL
Gujarat State Petronet Ltd.	GSPL
H.E.G. Ltd.	HEG
HBL Engineering Ltd.	HBLENGINE
HFCL Ltd.	HFCL
Happiest Minds Technologies Ltd.	HAPPSTMNDS
Himadri Speciality Chemical Ltd.	HSCL
Hindustan Copper Ltd.	HINDCOPPER
Home First Finance Company India Ltd.	HOMEFIRST
Honasa Consumer Ltd.	HONASA
IDBI Bank Ltd.	IDBI
IFCI Ltd.	IFCI
IIFL Finance Ltd.	IIFL
INOX India Ltd.	INOXINDIA
IRCON International Ltd.	IRCON
ITI Ltd.	ITI
Indegene Ltd.	INDGN
India Cements Ltd.	INDIACEM
Indiamart Intermesh Ltd.	INDIAMART
Indian Energy Exchange Ltd.	IEX
Indian Overseas Bank	IOB
Inox Wind Ltd.	INOXWIND
Intellect Design Arena Ltd.	INTELLECT
International Gemmological Institute (India) Ltd.	IGIL
Inventurus Knowledge Solutions Ltd.	IKS
J.B. Chemicals & Pharmaceuticals Ltd.	JBCHEPHARM
JBM Auto Ltd.	JBMA
JK Tyre & Industries Ltd.	JKTYRE
JM Financial Ltd.	JMFINANCIL
JSW Holdings Ltd.	JSWHL
Jaiprakash Power Ventures Ltd.	JPPOWER
Jammu & Kashmir Bank Ltd.	J&KBANK
Jindal Saw Ltd.	JINDALSAW
Jubilant Ingrevia Ltd.	JUBLINGREA
Jubilant Pharmova Ltd.	JUBLPHARMA
Jupiter Wagons Ltd.	JWL
Justdial Ltd.	JUSTDIAL
Jyothy Labs Ltd.	JYOTHYLAB
Jyoti CNC Automation Ltd.	JYOTICNC
KNR Constructions Ltd.	KNRCON
Kajaria Ceramics Ltd.	KAJARIACER
Kalpataru Projects International Ltd.	KPIL
Kansai Nerolac Paints Ltd.	KANSAINER
Karur Vysya Bank Ltd.	KARURVYSYA
Kaynes Technology India Ltd.	KAYNES
Kec International Ltd.	KEC
Kfin Technologies Ltd.	KFINTECH
Kirloskar Brothers Ltd.	KIRLOSBROS
Kirloskar Oil Eng Ltd.	KIRLOSENG
Krishna Institute of Medical Sciences Ltd.	KIMS
LT Foods Ltd.	LTFOODS
Latent View Analytics Ltd.	LATENTVIEW
Laurus Labs Ltd.	LAURUSLABS
Lemon Tree Hotels Ltd.	LEMONTREE
MMTC Ltd.	MMTC
Mahanagar Gas Ltd.	MGL
Maharashtra Seamless Ltd.	MAHSEAMLES
Manappuram Finance Ltd.	MANAPPURAM
Mastek Ltd.	MASTEK
Metropolis Healthcare Ltd.	METROPOLIS
Minda Corporation Ltd.	MINDACORP
Multi Commodity Exchange of India Ltd.	MCX
NATCO Pharma Ltd.	NATCOPHARM
NBCC (India) Ltd.	NBCC
NCC Ltd.	NCC
NMDC Steel Ltd.	NSLNISP
Narayana Hrudayalaya Ltd.	NH
Nava Ltd.	NAVA
Navin Fluorine International Ltd.	NAVINFLUOR
Netweb Technologies India Ltd.	NETWEB
Network18 Media & Investments Ltd.	NETWORK18
Neuland Laboratories Ltd.	NEULANDLAB
Newgen Software Technologies Ltd.	NEWGEN
Niva Bupa Health Insurance Company Ltd.	NIVABUPA
Nuvama Wealth Management Ltd.	NUVAMA
Olectra Greentech Ltd.	OLECTRA
PCBL Chemical Ltd.	PCBL
PG Electroplast Ltd.	PGEL
PNB Housing Finance Ltd.	PNBHOUSING
PNC Infratech Ltd.	PNCINFRA
PTC Industries Ltd.	PTCIL
PVR INOX Ltd.	PVRINOX
Pfizer Ltd.	PFIZER
Piramal Enterprises Ltd.	PEL
Piramal Pharma Ltd.	PPLPHARMA
Poly Medicure Ltd.	POLYMED
Poonawalla Fincorp Ltd.	POONAWALLA
Praj Industries Ltd.	PRAJIND
Quess Corp Ltd.	QUESS
R R Kabel Ltd.	RRKABEL
RBL Bank Ltd.	RBLBANK
RHI MAGNESITA INDIA LTD.	RHIM
RITES Ltd.	RITES
Radico Khaitan Ltd	RADICO
Railtel Corporation Of India Ltd.	RAILTEL
Rainbow Childrens Medicare Ltd.	RAINBOW
Ramkrishna Forgings Ltd.	RKFORGE
Rashtriya Chemicals & Fertilizers Ltd.	RCF
RattanIndia Enterprises Ltd.	RTNINDIA
Raymond Lifestyle Ltd.	RAYMONDLSL
Raymond Ltd.	RAYMOND
Redington Ltd.	REDINGTON
Reliance Power Ltd.	RPOWER
Route Mobile Ltd.	ROUTE
SBFC Finance Ltd.	SBFC
SKF India Ltd.	SKFINDIA
Sagility India Ltd.	SAGILITY
Sai Life Sciences Ltd.	SAILIFE
Sammaan Capital Ltd.	SAMMAANCAP
Sapphire Foods India Ltd.	SAPPHIRE
Sarda Energy and Minerals Ltd.	SARDAEN
Saregama India Ltd	SAREGAMA
Schneider Electric Infrastructure Ltd.	SCHNEIDER
Shipping Corporation of India Ltd.	SCI
Shree Renuka Sugars Ltd.	RENUKA
Shyam Metalics and Energy Ltd.	SHYAMMETL
Signatureglobal (India) Ltd.	SIGNATURE
Sobha Ltd.	SOBHA
Sonata Software Ltd.	SONATSOFTW
Sterling and Wilson Renewable Energy Ltd.	SWSOLAR
Sumitomo Chemical India Ltd.	SUMICHEM
Suven Pharmaceuticals Ltd.	SUVENPHAR
Swan Energy Ltd.	SWANENERGY
Syrma SGS Technology Ltd.	SYRMA
TBO Tek Ltd.	TBOTEK
Tanla Platforms Ltd.	TANLA
Tata Chemicals Ltd.	TATACHEM
Tata Teleservices (Maharashtra) Ltd.	TTML
Techno Electric & Engineering Company Ltd.	TECHNOE
Tejas Networks Ltd.	TEJASNET
The Ramco Cements Ltd.	RAMCOCEM
Timken India Ltd.	TIMKEN
Titagarh Rail Systems Ltd.	TITAGARH
Transformers And Rectifiers (India) Ltd.	TARIL
Trident Ltd.	TRIDENT
Triveni Engineering & Industries Ltd.	TRIVENI
Triveni Turbine Ltd.	TRITURBINE
UCO Bank	UCOBANK
UTI Asset Management Company Ltd.	UTIAMC
Usha Martin Ltd.	USHAMART
V-Guard Industries Ltd.	VGUARD
Valor Estate Ltd.	DBREALTY
Vardhman Textiles Ltd.	VTL
Vedant Fashions Ltd.	MANYAVAR
Vijaya Diagnostic Centre Ltd.	VIJAYA
Welspun Corp Ltd.	WELCORP
Welspun Living Ltd.	WELSPUNLIV
Westlife Foodworld Ltd.	WESTLIFE
Whirlpool of India Ltd.	WHIRLPOOL
Wockhardt Ltd.	WOCKPHARMA
ZF Commercial Vehicle Control Systems India Ltd.	ZFCVINDIA
Zee Entertainment Enterprises Ltd.	ZEEL
Zen Technologies Ltd.	ZENTEC
Zensar Technolgies Ltd.	ZENSARTECH
eClerx Services Ltd.	ECLERX"""


stocks_smallcap = parse_stock_list(stock_list_smallcap)
results_smallcap = analyze_stocks(stocks_smallcap, output_dir="stock_analysis/reports_v2/smallcap_250", max_workers=10, end_date="2025-04-12")

In [None]:
stock_list_microcap = """Company Name	Symbol
AGI Greenpac Ltd.	AGI
ASK Automotive Ltd.	ASKAUTOLTD
Aarti Drugs Ltd.	AARTIDRUGS
Aarti Pharmalabs Ltd.	AARTIPHARM
Aditya Vision Ltd.	AVL
Advanced Enzyme Tech Ltd.	ADVENZYMES
Aether Industries Ltd.	AETHER
Ahluwalia Contracts (India) Ltd.	AHLUCONT
Akzo Nobel India Ltd.	AKZOINDIA
Allcargo Logistics Ltd.	ALLCARGO
Allied Blenders and Distillers Ltd.	ABDL
Ami Organics Ltd.	AMIORG
Apeejay Surrendra Park Hotels Ltd.	PARKHOTELS
Archean Chemical Industries Ltd.	ACI
Arvind Fashions Ltd.	ARVINDFASN
Arvind Ltd.	ARVIND
Ashoka Buildcon Ltd.	ASHOKA
Astra Microwave Products Ltd.	ASTRAMICRO
Aurionpro Solution Ltd.	AURIONPRO
Avalon Technologies Ltd.	AVALON
Avanti Feeds Ltd.	AVANTIFEED
Awfis Space Solutions Ltd.	AWFIS
Azad Engineering Ltd.	AZAD
Bajaj Hindusthan Sugar Ltd.	BAJAJHIND
Balaji Amines Ltd.	BALAMINES
Balu Forge Industries Ltd.	BALUFORGE
Banco Products (India) Ltd.	BANCOINDIA
Bansal Wire Industries Ltd.	BANSALWIRE
Bhansali Engineering Polymers Ltd.	BEPL
Bharat Bijlee Ltd.	BBL
Birla Corporation Ltd.	BIRLACORPN
Blue Jet Healthcare Ltd.	BLUEJET
Bombay Dyeing & Manufacturing Co. Ltd.	BOMDYEING
Borosil Ltd.	BOROLTD
Borosil Renewables Ltd.	BORORENEW
CIE Automotive India Ltd.	CIEINDIA
CMS Info Systems Ltd.	CMSINFO
CSB Bank Ltd.	CSBBANK
Cartrade Tech Ltd.	CARTRADE
Ceigall India Ltd.	CEIGALL
Cello World Ltd.	CELLO
Chemplast Sanmar Ltd.	CHEMPLASTS
Choice International Ltd.	CHOICEIN
Cigniti Technologies Ltd.	CIGNITITEC
Cyient DLM Ltd.	CYIENTDLM
DCB Bank Ltd.	DCBBANK
DCX Systems Ltd.	DCXINDIA
Datamatics Global Services Ltd.	DATAMATICS
Dhani Services Ltd.	DHANI
Dilip Buildcon Ltd.	DBL
Dishman Carbogen Amcis Ltd.	DCAL
Dodla Dairy Ltd.	DODLA
Dynamatic Technologies Ltd.	DYNAMATECH
EPL Ltd.	EPL
Easy Trip Planners Ltd.	EASEMYTRIP
Edelweiss Financial Services Ltd.	EDELWEISS
Electronics Mart India Ltd.	EMIL
Electrosteel Castings Ltd.	ELECTCAST
Embassy Developments Ltd.	EMBDL
Entero Healthcare Solutions Ltd.	ENTERO
Enviro Infra Engineers Ltd.	EIEL
Epigral Ltd.	EPIGRAL
Equitas Small Finance Bank Ltd.	EQUITASBNK
Ethos Ltd.	ETHOSLTD
Eureka Forbes Ltd.	EUREKAFORB
FDC Ltd.	FDC
Fiem Industries Ltd	FIEMIND
Fine Organic Industries Ltd.	FINEORG
Fineotex Chemical Ltd.	FCL
Force Motors Ltd.	FORCEMOT
G R Infraprojects Ltd.	GRINFRA
GHCL Ltd.	GHCL
GMM Pfaudler Ltd.	GMMPFAUDLR
GMR Power and Urban Infra Ltd.	GMRP&UI
Gabriel India Ltd.	GABRIEL
Ganesh Housing Corporation Ltd.	GANESHHOUC
Ganesha Ecosphere Ltd.	GANECOS
Garware Hi-Tech Films Ltd.	GRWRHITECH
Garware Technical Fibres Ltd.	GARFIBRES
Gateway Distriparks Ltd.	GATEWAY
Gokaldas Exports Ltd.	GOKEX
Gopal Snacks Ltd.	GOPAL
Greaves Cotton Ltd.	GREAVESCOT
Greenpanel Industries Ltd.	GREENPANEL
Greenply Industries Ltd.	GREENPLY
Gujarat Ambuja Exports Ltd.	GAEL
Gujarat State Fertilizers & Chemicals Ltd.	GSFC
Gulf Oil Lubricants India Ltd.	GULFOILLUB
H.G. Infra Engineering Ltd.	HGINFRA
Hathway Cable & Datacom Ltd.	HATHWAY
Healthcare Global Enterprises Ltd.	HCG
HeidelbergCement India Ltd.	HEIDELBERG
Hemisphere Properties India Ltd.	HEMIPROP
Heritage Foods Ltd.	HERITGFOOD
Hikal Ltd.	HIKAL
Hindustan Construction Co. Ltd.	HCC
IFB Industries Ltd.	IFBIND
IIFL Capital Services Ltd.	IIFLCAPS
ITD Cementation India Ltd.	ITDCEM
Imagicaaworld Entertainment Ltd.	IMAGICAA
India Glycols Ltd.	INDIAGLYCO
India Shelter Finance Corporation Ltd.	INDIASHLTR
Indian Metals & Ferro Alloys Ltd.	IMFA
Indigo Paints Ltd.	INDIGOPNTS
Indo Count Industries Ltd.	ICIL
Infibeam Avenues Ltd.	INFIBEAM
Ingersoll Rand (India) Ltd.	INGERRAND
Innova Captab Ltd.	INNOVACAP
Inox Green Energy Services Ltd.	INOXGREEN
Ion Exchange (India) Ltd.	IONEXCHANG
Isgec Heavy Engineering Ltd.	ISGEC
J.Kumar Infraprojects Ltd.	JKIL
JK Lakshmi Cement Ltd.	JKLAKSHMI
JK Paper Ltd.	JKPAPER
JTL Industries Ltd.	JTLIND
Jai Balaji Industries Ltd.	JAIBALAJI
Jai Corp Ltd.	JAICORPLTD
Jain Irrigation Systems Ltd.	JISLJALEQS
Jamna Auto Industries Ltd.	JAMNAAUTO
Jana Small Finance Bank Ltd.	JSFB
Jindal Worldwide Ltd.	JINDWORLD
Johnson Controls - Hitachi Air Conditioning India Ltd.	JCHAC
KPI Green Energy Ltd.	KPIGREEN
KRBL Ltd.	KRBL
KSB Ltd.	KSB
Kalyani Steels Ltd.	KSL
Karnataka Bank Ltd.	KTKBANK
Kaveri Seed Company Ltd.	KSCL
Kirloskar Pneumatic Company Ltd.	KIRLPNU
LMW Ltd.	LMW
Laxmi Organic Industries Ltd.	LXCHEM
Le Travenues Technology Ltd.	IXIGO
Lloyds Engineering Works Ltd.	LLOYDSENGG
Lloyds Enterprises Ltd.	LLOYDSENT
Lux Industries Ltd.	LUXIND
MOIL Ltd.	MOIL
MSTC Ltd.	MSTCLTD
MTAR Technologies Ltd.	MTARTECH
Maharashtra Scooters Ltd.	MAHSCOOTER
Mahindra Lifespace Developers Ltd.	MAHLIFE
Man Infraconstruction Ltd.	MANINFRA
Marksans Pharma Ltd.	MARKSANS
Max Estates Ltd.	MAXESTATES
Medplus Health Services Ltd.	MEDPLUS
Mishra Dhatu Nigam Ltd.	MIDHANI
Mrs. Bectors Food Specialities Ltd.	BECTORFOOD
NEOGEN CHEMICALS LTD.	NEOGEN
NESCO Ltd.	NESCO
NOCIL Ltd.	NOCIL
National Fertilizers Ltd.	NFL
Nazara Technologies Ltd.	NAZARA
Nuvoco Vistas Corporation Ltd.	NUVOCO
Optiemus Infracom Ltd.	OPTIEMUS
Orchid Pharma Ltd.	ORCHPHARMA
Orient Cement Ltd.	ORIENTCEM
Orissa Min Dev Co Ltd.	ORISSAMINE
P N Gadgil Jewellers Ltd.	PNGJL
PC Jeweller Ltd.	PCJEWELLER
PTC India Ltd.	PTC
Paisalo Digital Ltd.	PAISALO
Paradeep Phosphates Ltd.	PARADEEP
Paras Defence and Space Technologies Ltd.	PARAS
Patel Engineering Ltd.	PATELENG
Pearl Global Industries Ltd.	PGIL
Polyplex Corporation Ltd.	POLYPLEX
Power Mech Projects Ltd.	POWERMECH
Pricol Ltd.	PRICOLLTD
Prince Pipes and Fittings Ltd.	PRINCEPIPE
Prism Johnson Ltd.	PRSMJOHNSN
Prudent Corporate Advisory Services Ltd.	PRUDENT
Rain Industries Ltd	RAIN
Rajesh Exports Ltd.	RAJESHEXPO
Rallis India Ltd.	RALLIS
Rategain Travel Technologies Ltd.	RATEGAIN
RattanIndia Power Ltd.	RTNPOWER
Redtape Ltd.	REDTAPE
Refex Industries Ltd.	REFEX
Reliance Infrastructure Ltd.	RELINFRA
Religare Enterprises Ltd.	RELIGARE
Responsive Industries Ltd.	RESPONIND
Restaurant Brands Asia Ltd.	RBA
Rossari Biotech Ltd.	ROSSARI
Safari Industries (India) Ltd.	SAFARI
Samhi Hotels Ltd.	SAMHI
Sanofi Consumer Healthcare India Ltd.	SANOFICONR
Sanofi India Ltd.	SANOFI
Sansera Engineering Ltd.	SANSERA
Senco Gold Ltd.	SENCO
Sequent Scientific Ltd.	SEQUENT
Shaily Engineering Plastics Ltd.	SHAILY
Shakti Pumps (India) Ltd.	SHAKTIPUMP
Sharda Cropchem Ltd.	SHARDACROP
Share India Securities Ltd.	SHAREINDIA
Sheela Foam Ltd.	SFL
Shilpa Medicare Ltd.	SHILPAMED
Shivalik Bimetal Controls Ltd.	SBCL
Shoppers Stop Ltd.	SHOPERSTOP
Shriram Pistons & Rings Ltd.	SHRIPISTON
Skipper Ltd.	SKIPPER
South Indian Bank Ltd.	SOUTHBANK
Spandana Sphoorty Financial Ltd.	SPANDANA
Star Cement Ltd.	STARCEMENT
Sterlite Technologies Ltd.	STLTECH
Strides Pharma Science Ltd.	STAR
Stylam Industries Ltd.	STYLAMIND
Subros Ltd.	SUBROS
Sudarshan Chemical Industries Ltd.	SUDARSCHEM
Sula Vineyards Ltd.	SULA
Sun Pharma Advanced Research Company Ltd.	SPARC
Sunflag Iron & Steel Company Ltd.	SUNFLAG
Sunteck Realty Ltd.	SUNTECK
Suprajit Engineering Ltd.	SUPRAJIT
Supriya Lifescience Ltd.	SUPRIYA
Surya Roshni Ltd.	SURYAROSNI
Symphony Ltd.	SYMPHONY
TARC Ltd.	TARC
TD Power Systems Ltd.	TDPOWERSYS
TVS Supply Chain Solutions Ltd.	TVSSCS
Teamlease Services Ltd.	TEAMLEASE
Technocraft Industries (India) Ltd.	TIIL
Tega Industries Ltd.	TEGA
Texmaco Rail & Eng. Ltd.	TEXRAIL
Thangamayil Jewellery Ltd.	THANGAMAYL
The Anup Engineering Ltd.	ANUP
Thirumalai Chemicals Ltd.	TIRUMALCHM
Thomas Cook (India) Ltd.	THOMASCOOK
Tilaknagar Industries Ltd.	TI
Time Technoplast Ltd.	TIMETECHNO
Tips Music Ltd.	TIPSMUSIC
Transrail Lighting Ltd.	TRANSRAILL
Ujjivan Small Finance Bank Ltd.	UJJIVANSFB
Unimech Aerospace and Manufacturing Ltd.	UNIMECH
V-Mart Retail Ltd.	VMART
V.I.P. Industries Ltd.	VIPIND
VST Industries Ltd.	VSTIND
Va Tech Wabag Ltd.	WABAG
Vaibhav Global Ltd.	VAIBHAVGBL
Varroc Engineering Ltd.	VARROC
Ventive Hospitality Ltd.	VENTIVE
Venus Pipes & Tubes Ltd.	VENUSPIPES
Vesuvius India Ltd.	VESUVIUS
Voltamp Transformers Ltd	VOLTAMP
Websol Energy System Ltd.	WEBELSOLAR
Welspun Enterprises Ltd.	WELENT
Wonderla Holidays Ltd.	WONDERLA
Yatharth Hospital & Trauma Care Services Ltd.	YATHARTH
Zaggle Prepaid Ocean Services Ltd.	ZAGGLE
Zinka Logistics Solutions Ltd.	BLACKBUCK
Zydus Wellness Ltd.	ZYDUSWELL
eMudhra Ltd.	EMUDHRA"""


stocks_microcap = parse_stock_list(stock_list_microcap)
results_smallcap = analyze_stocks(stocks_microcap, output_dir="stock_analysis/reports_v2/microcap_250", max_workers=10, end_date="2025-04-12")

In [None]:
# Green color ANSI escape code
GREEN = '\033[92m'
RESET = '\033[0m'  # Reset color code

print(f"{GREEN}Nifty Stocks Processed {len(stocks_nifty)} \nMidcap Stocks Processed {len(stocks_midcap)} \nSmallcap Stocks Processed {len(stocks_smallcap)} \nMicrocap Stocks Processed {len(stocks_microcap)}{RESET}")
print(f"{GREEN}Total Stocks Processed {len(stocks_nifty)+len(stocks_midcap)+len(stocks_smallcap)+len(stocks_microcap)}{RESET}")

In [None]:
# price is a factor for our company, so we're going to use a low cost model

MODEL = "gemini-2.0-flash"
db_name = "vector_db"
path = "stock_analysis/reports_v2/*"

In [None]:
load_dotenv()
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY', 'your-key-if-not-using-env')
genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
# Read in documents using LangChain's loaders
# Take everything in all the sub-folders of our knowledgebase

folders = glob.glob(path)
print("folders ", folders)
# With thanks to CG and Jon R, students on the course, for this fix needed for some users
text_loader_kwargs = {'encoding': 'utf-8'}
# If that doesn't work, some Windows users might need to uncomment the next line instead
# text_loader_kwargs={'autodetect_encoding': True}

documents = []
for folder in folders:
    # Extract category name from folder path (nifty_50, midcap_150, etc.)
    category = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs = loader.load()
    for doc in folder_docs:
        # Extract stock symbol from filename (e.g., HDFC.md -> HDFC)
        filename = os.path.basename(doc.metadata["source"])
        symbol = os.path.splitext(filename)[0]

        # Add structured metadata
        doc.metadata["category"] = category  # Explicit category field
        doc.metadata["stock_category"] = category  # Alternative name for filtering
        doc.metadata["symbol"] = symbol  # Add symbol for stock-specific searches
        doc.metadata["doc_type"] = category  # Keep original for compatibility
        documents.append(doc)

    print(f"Loaded {len(folder_docs)} documents from {category}")

In [None]:
# Add preprocessing to ensure key information like normalized z-score rank is prominently included
documents_with_enhanced_context = []
for doc in documents:
    # Extract the most important ranking information from the document
    normalized_z_rank = re.findall(r'Normalized Z-Score Rank: (\d+) out of (\d+)', doc.page_content)
    weighted_z_rank = re.findall(r'Weighted Z-Score Rank: (\d+) out of (\d+)', doc.page_content)
    one_year_rank = re.findall(r'1-Year Momentum Rank: (\d+) out of (\d+)', doc.page_content)
    six_month_rank = re.findall(r'6-Month Momentum Rank: (\d+) out of (\d+)', doc.page_content)

    # Get universe/category information
    universe_match = re.search(r'Universe/Category: ([^\n]+)', doc.page_content)
    universe = universe_match.group(1) if universe_match else None

    # Get stock name and symbol from the first line (usually contains "# Company Name (SYMBOL) Analysis")
    stock_info = re.search(r'# (.*?) \((.*?)\)', doc.page_content)
    company_name = stock_info.group(1) if stock_info else "Unknown Company"
    symbol = stock_info.group(2) if stock_info else "UNKNOWN"

    # Extract current price and performance metrics
    current_price_match = re.search(r'Current Price: Rs\.([0-9.]+)', doc.page_content)
    current_price = current_price_match.group(1) if current_price_match else None

    one_year_change_match = re.search(r'1-Year Change: ([0-9.-]+)%', doc.page_content)
    one_year_change = one_year_change_match.group(1) if one_year_change_match else None

    six_month_change_match = re.search(r'6-Month Change: ([0-9.-]+)%', doc.page_content)
    six_month_change = six_month_change_match.group(1) if six_month_change_match else None

    # If the document contains rank information, enhance its representation
    rank_summary = f"STOCK RANKING INFORMATION - HIGH PRIORITY:\n"
    rank_summary += f"Stock: {company_name} ({symbol})\n"

    if universe:
        rank_summary += f"Universe/Category: {universe}\n"

    if current_price:
        rank_summary += f"Current Price: Rs.{current_price}\n"

    if one_year_change:
        rank_summary += f"1-Year Change: {one_year_change}%\n"

    if six_month_change:
        rank_summary += f"6-Month Change: {six_month_change}%\n"

    # Add ranking information with special formatting to make it stand out
    if normalized_z_rank:
        rank_summary += f"===NORMALIZED Z-SCORE RANK: {normalized_z_rank[0][0]} OUT OF {normalized_z_rank[0][1]}===\n"

    if weighted_z_rank:
        rank_summary += f"===WEIGHTED Z-SCORE RANK: {weighted_z_rank[0][0]} OUT OF {weighted_z_rank[0][1]}===\n"

    if one_year_rank:
        rank_summary += f"===1-YEAR MOMENTUM RANK: {one_year_rank[0][0]} OUT OF {one_year_rank[0][1]}===\n"

    if six_month_rank:
        rank_summary += f"===6-MONTH MOMENTUM RANK: {six_month_rank[0][0]} OUT OF {six_month_rank[0][1]}===\n"

    # Prepend the summary to the document content (THREE times to heavily emphasize it)
    # This ensures that the ranking information gets the highest weight in the embedding
    doc.page_content = rank_summary + "\n" + rank_summary + "\n" + rank_summary + "\n" + doc.page_content

    # Also add key information to metadata for filtering
    if normalized_z_rank:
        doc.metadata['normalized_z_rank'] = int(normalized_z_rank[0][0])
        doc.metadata['normalized_z_total'] = int(normalized_z_rank[0][1])

    if weighted_z_rank:
        doc.metadata['weighted_z_rank'] = int(weighted_z_rank[0][0])
        doc.metadata['weighted_z_total'] = int(weighted_z_rank[0][1])

    if one_year_rank:
        doc.metadata['one_year_rank'] = int(one_year_rank[0][0])
        doc.metadata['one_year_total'] = int(one_year_rank[0][1])

    if six_month_rank:
        doc.metadata['six_month_rank'] = int(six_month_rank[0][0])
        doc.metadata['six_month_total'] = int(six_month_rank[0][1])

    documents_with_enhanced_context.append(doc)

# Use the enhanced documents for chunking - adjust the chunking strategy to preserve entire rank sections
text_splitter = CharacterTextSplitter(
    chunk_size=1800,  # Increase chunk size to ensure rank sections stay together
    chunk_overlap=400,  # Increase overlap for better context preservation
    separator="\n\n",  # Split on paragraph boundaries
    add_start_index=True
)

chunks = text_splitter.split_documents(documents_with_enhanced_context)


# Print some examples of the metadata to verify
for i in range(min(3, len(chunks))):
    print(f"Chunk {i} metadata: {chunks[i].metadata}")

In [None]:
len(chunks)

In [None]:
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
print(f"Document types found: {', '.join(doc_types)}")

In [None]:
# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Delete if already exists

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

# Create vectorstore

vectorstore = vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory=db_name,
    collection_metadata={"hnsw:space": "cosine"}  # Explicitly specify distance metric
)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

collection = vectorstore._collection
sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"The vectors have {dimensions:,} dimensions")

In [None]:
# Prework for chroma vectors

result = collection.get(include=['embeddings', 'documents', 'metadatas'])
vectors = np.array(result['embeddings'])
documents = result['documents']
doc_types = [metadata['doc_type'] for metadata in result['metadatas']]
colors = [['blue', 'green', 'red', 'orange'][['nifty_50', 'midcap_150', 'smallcap_250','microcap_250'].index(t)] for t in doc_types]

In [None]:
def create_2d_visualization():
    # We humans find it easier to visalize things in 2D!
    tsne = TSNE(n_components=2, random_state=42, perplexity=min(25, len(vectors) - 1))
    reduced_vectors = tsne.fit_transform(vectors)

    # Create the 2D scatter plot with improved layout for side-by-side display
    fig = go.Figure(data=[go.Scatter(
        x=reduced_vectors[:, 0],
        y=reduced_vectors[:, 1],
        mode='markers',
        marker=dict(size=5, color=colors, opacity=0.8),
        text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
        hoverinfo='text'
    )])

    fig.update_layout(
        margin=dict(r=10, b=10, l=10, t=10),  # Minimize margins
        template="plotly_white",
        autosize=True,
        hovermode="closest",
        height=None,  # Let height be determined by container
        width=None    # Let width be determined by container
    )

    return fig

def create_3d_visualization():
    tsne = TSNE(n_components=3, random_state=42, perplexity=min(25, len(vectors) - 1))
    reduced_vectors = tsne.fit_transform(vectors)

    # Create the 3D scatter plot with improved layout for side-by-side display
    fig = go.Figure(data=[go.Scatter3d(
        x=reduced_vectors[:, 0],
        y=reduced_vectors[:, 1],
        z=reduced_vectors[:, 2],
        mode='markers',
        marker=dict(size=4, color=colors, opacity=0.7),  # Smaller markers for better performance
        text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
        hoverinfo='text'
    )])

    fig.update_layout(
        scene=dict(
            xaxis_title='x',
            yaxis_title='y',
            zaxis_title='z',
            aspectmode='data'  # Better fitting for the container
        ),
        margin=dict(r=0, b=0, l=0, t=0),  # Minimize margins
        template="plotly_white",
        autosize=True,
        height=None,  # Let height be determined by container
        width=None    # Let width be determined by container
    )

    return fig

In [None]:
print(f"Total chunks in vectorstore: {len(chunks)}")
print(f"Total unique stocks: 621")
print(f"Average chunks per stock: {len(chunks)/621:.2f}")

# Calculate recommended k value
recommended_k = len(chunks)  # Start with maximum possible chunks
print(f"Recommended k value: {recommended_k}")

In [None]:
TEMPLATE = """
You are a financial advisor specializing in the Indian stock market analysis.
Use the following pieces of context to answer the question at the end.
Always respond in English only.
Focus on providing factual information from the given context.

If the question is about a specific category of stocks (like "Nifty 50", "midcap", "smallcap", or "microcap"):
- Group your answer by the requested category
- Present information as a well-organized list
- Include key metrics like price, momentum scores, and rankings when available
- When asked for rankings or "top stocks", ALWAYS sort by Normalized Z-Score Rank in ascending order (Rank 1 is best) unless another specific rank type is mentioned
- When asked for "top 10" or similar, return EXACTLY the stocks with ranks 1-10, not any random 10 stocks
- Be very precise about the category - never mix stocks from different categories (e.g., don't include microcap stocks when asked about smallcap or don't mix stocks from one universe to other)

If the question mentions "normalized z-score" or "normalized z-score rank":
- THIS IS EXTREMELY IMPORTANT: Sort results by Normalized Z-Score Rank in ascending order (Rank 1 is best)
- Include ONLY stocks that have this specific rank information
- Present results with lowest rank numbers first (1, 2, 3, etc.)
- Include the stock symbol, company name, and the exact Normalized Z-Score Rank value
- Example format for each stock: "Company Name (SYMBOL) - Normalized Z-Score Rank: X out of Y"

If the question mentions "weighted z-score" or "weighted z-score rank":
- Sort results by Weighted Z-Score Rank in ascending order (Rank 1 is best)
- Include ONLY stocks that have this specific rank information
- Present results with lowest rank numbers first (1, 2, 3, etc.)
- Include the stock symbol, company name, and the exact Weighted Z-Score Rank value

If the question mentions "momentum", "positive momentum", "strong performance" or similar terms:
- Focus on stocks with positive momentum indicators such as:
  * Positive 1-year or 6-month price change (>0%)
  * Strong RSI values (above 50)
  * Good momentum ranks (in top 40% of their category)
  * Positive MACD indicators
  * Price above key moving averages
- Sort results by momentum ranks when available (Rank 1 is best)
- Be explicit about which momentum criteria you're using

If the question is about a specific stock, make sure to include:
- Technical indicators (RSI, MACD, etc.) if available
- Price performance data
- Strength and weakness analysis
- Momentum rankings if available

If you don't know the answer based on the given context, just say you don't have enough information.
Don't make up information that isn't provided in the context.

Context:
{context}

Question: {question}

Helpful Answer (in English only):
"""

In [None]:
prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])

In [None]:
# Set up the base retriever parameters
max_k = min(100, len(chunks) // 10)  # Cap at 100 or 10% of chunks, whichever is smaller

In [None]:
def process_category_query(query, vectorstore):
    """Add category-specific processing for better retrieval"""
    query_lower = query.lower()

    # Define category keywords and mappings
    categories = {
        "nifty 50": "nifty_50",
        "nifty50": "nifty_50",
        "nifty": "nifty_50",
        "midcap": "midcap_150",
        "mid-cap": "midcap_150",
        "mid cap": "midcap_150",
        "smallcap": "smallcap_250",
        "small-cap": "smallcap_250",
        "small cap": "smallcap_250",
        "microcap": "microcap_250",
        "micro-cap": "microcap_250",
        "micro cap": "microcap_250"
    }

    # Check if query contains category keywords
    detected_category = None
    for keyword, folder_name in categories.items():
        pattern = r'\b' + re.escape(keyword) + r'\b'
        if re.search(pattern, query_lower):
            detected_category = folder_name
            print(f"Detected category: {detected_category}")
            break
        if keyword in query_lower:
            detected_category = folder_name
            print(f"Detected category: {detected_category}")
            break

    # Check for momentum or performance filter
    momentum_filter = False
    momentum_keywords = ["momentum", "positive momentum", "strong momentum", "good performance",
                         "top performing", "top stocks", "best stocks", "positive return"]

    # Check for rank-based queries
    rank_based_query = bool(re.search(r'top\s+\d+|best\s+\d+', query_lower))
    rank_keywords = ["rank", "ranking", "ranked", "momentum rank", "z-score rank"]

    for keyword in momentum_keywords:
        if keyword in query_lower:
            momentum_filter = True
            print(f"Detected momentum filter: {keyword}")
            break

    for keyword in rank_keywords:
        if keyword in query_lower:
            momentum_filter = True
            print(f"Detected rank filter: {keyword}")
            break

    # Around line 2940-2965
    if detected_category:
        # Use metadata filtering with the vectorstore
        filtered_retriever = vectorstore.as_retriever(
            search_type="mmr",
            search_kwargs={
                "k": max_k,
                "fetch_k": max_k * 3,
                "lambda_mult": 0.6,
                "filter": {
                    # Use multiple metadata fields for redundant filtering
                    "$and": [
                        {"doc_type": detected_category},
                        {"category": detected_category}
                    ]
                }
            }
        )
        return filtered_retriever, detected_category, momentum_filter or rank_based_query

    # If no category detected, return the default retriever
    return vectorstore.as_retriever(
        search_type="mmr",
        search_kwargs={
            "k": max_k,
            "fetch_k": max_k * 2,
            "lambda_mult": 0.7,
        }
    ), None, momentum_filter or rank_based_query

In [None]:
def debug_retrieval(query):
    """Test function to check what documents are being retrieved for a query"""
    # Get the query keywords
    query_lower = query.lower()

    # Check for category keywords
    categories = {
        "nifty 50": "nifty_50",
        "nifty50": "nifty_50",
        "nifty": "nifty_50",
        "midcap": "midcap_150",
        "mid-cap": "midcap_150",
        "mid cap": "midcap_150",
        "smallcap": "smallcap_250",
        "small-cap": "smallcap_250",
        "small cap": "smallcap_250",
        "microcap": "microcap_250",
        "micro-cap": "microcap_250",
        "micro cap": "microcap_250"
    }

    detected = False
    for keyword, category in categories.items():
        if keyword in query_lower:
            print(f"Found category keyword '{keyword}' -> '{category}'")
            detected = True

    if not detected:
        print("No category keyword detected in query")

    # Test retrieval with the process_category_query
    specific_retriever = process_category_query(query, vectorstore)
    docs = specific_retriever.get_relevant_documents(query)

    # Check document metadata
    print(f"\nRetrieved {len(docs)} documents")
    categories_found = {}

    for i, doc in enumerate(docs[:5]):  # Check first 5 docs
        doc_type = doc.metadata.get('doc_type', 'unknown')
        categories_found[doc_type] = categories_found.get(doc_type, 0) + 1
        if i < 3:  # Show details for first 3 docs
            print(f"\nDocument {i+1}:")
            print(f"  Metadata: {doc.metadata}")
            print(f"  Content (first 100 chars): {doc.page_content[:100]}...")

    print("\nDocument categories distribution:")
    for cat, count in categories_found.items():
        print(f"  - {cat}: {count} documents")

    return docs

# Uncomment and run this to debug specific queries
# debug_retrieval("Show me Nifty 50 stocks")
# debug_retrieval("List midcap stocks with good RSI")
# debug_retrieval("What are the smallcap stocks with strong momentum?")
# debug_retrieval("Show me microcap stocks")

In [None]:
class DynamicCategoryRetriever(BaseRetriever):
    vectorstore: Any = Field(description="Vector store for embeddings")

    class Config:
        """Configuration for this pydantic object."""
        arbitrary_types_allowed = True

    def get_relevant_documents(self, query: str) -> List[Document]:
        # Get the appropriate retriever based on query content
        specific_retriever, category, has_momentum_or_rank_filter = process_category_query(query, self.vectorstore)

        # Use it to retrieve documents
        docs = specific_retriever.get_relevant_documents(query)

        # Check if this is a rank-based query (like "top 10 stocks")
        rank_based_query = bool(re.search(r'top\s+\d+|best\s+\d+', query.lower()))

        # Apply additional momentum/ranking filtering if needed
        if (has_momentum_or_rank_filter or rank_based_query) and len(docs) > 0:
            # We'll increase the number of documents retrieved for rank-based queries
            # to ensure we have enough data to find the truly top-ranked stocks
            if rank_based_query and category:
                print(f"Rank-based query detected for {category}. Retrieved {len(docs)} documents.")
                # The actual sorting will be handled by the LLM with the improved prompt

        return docs

    async def aget_relevant_documents(self, query: str) -> List[Document]:
        # Just call the synchronous version for simplicity
        return self.get_relevant_documents(query)

In [None]:
# create a new Chat with GenerativeAI
llm = ChatGoogleGenerativeAI(model=MODEL, temperature=0.7, google_api_key = GOOGLE_API_KEY)

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')

# Create the dynamic retriever
retriever = DynamicCategoryRetriever(vectorstore=vectorstore)

# Create the conversation chain as a global variable so it's accessible
global conversation_chain
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": prompt},
    return_source_documents=True
)

In [None]:
# Function to inspect the vectorstore for category distribution
def inspect_vectorstore_categories(vectorstore):
    collection = vectorstore._collection
    result = collection.get(include=['metadatas'], limit=1000)

    metadatas = result['metadatas']
    categories = {}

    print(f"Inspecting {len(metadatas)} documents in vectorstore")

    # Count documents by doc_type (category)
    for metadata in metadatas:
        doc_type = metadata.get('doc_type', 'unknown')
        categories[doc_type] = categories.get(doc_type, 0) + 1

    print("\nDocument distribution by category:")
    for category, count in categories.items():
        print(f"  - {category}: {count} documents")

    return categories

# Run this to check your vectorstore category distribution
categories_in_db = inspect_vectorstore_categories(vectorstore)

In [None]:
# # Debug function to inspect metadata in the vectorstore
# def inspect_vectorstore_metadata(vectorstore, n_samples=5):
#     collection = vectorstore._collection
#     result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=100)

#     metadatas = result['metadatas']
#     categories = {}

#     print(f"\n=== Inspecting metadata for {len(metadatas)} documents ===")

#     # Count documents by category
#     for metadata in metadatas:
#         category = metadata.get('category', metadata.get('doc_type', 'unknown'))
#         categories[category] = categories.get(category, 0) + 1

#     print("\nDocuments by category:")
#     for category, count in categories.items():
#         print(f"  - {category}: {count} documents")

#     # Show sample metadata entries
#     print(f"\nSample metadata entries ({n_samples}):")
#     for i, metadata in enumerate(metadatas[:n_samples]):
#         print(f"\nDocument {i+1}:")
#         for key, value in metadata.items():
#             print(f"  {key}: {value}")

#     return categories

# # Run the inspection after creating the vectorstore
# categories = inspect_vectorstore_metadata(vectorstore)

In [None]:
def chat(message, history):
    global conversation_chain  # Explicitly use the global variable
    try:
        # Check for normalized z-score keywords in the query
        normalized_z_keywords = ["normalized z-score", "normalized z score", "normalized z-score rank", "normalized z score rank"]
        has_normalized_z = any(keyword in message.lower() for keyword in normalized_z_keywords)

        # Check for weighted z-score keywords in the query
        weighted_z_keywords = ["weighted z-score", "weighted z score", "weighted z-score rank", "weighted z score rank"]
        has_weighted_z = any(keyword in message.lower() for keyword in weighted_z_keywords)

        # Check for generic rank keywords that should use normalized z-score as default
        generic_rank_keywords = ["rank", "ranking", "top stocks", "best stocks"]
        has_generic_rank = any(keyword in message.lower() for keyword in generic_rank_keywords) and not has_normalized_z and not has_weighted_z

        # Check for momentum keywords in the query
        momentum_keywords = ["momentum", "positive momentum", "strong momentum", "good performance",
                           "top performing", "top stocks", "best stocks", "positive return"]
        has_momentum_filter = any(keyword in message.lower() for keyword in momentum_keywords)

        # Check for rank-based queries (top N stocks)
        rank_based_query = bool(re.search(r'top\s+\d+|best\s+\d+', message.lower()))
        rank_keywords = ["rank", "ranking", "ranked", "momentum rank"]
        has_rank_filter = any(keyword in message.lower() for keyword in rank_keywords)

        # If it's a normalized z-score query or generic rank query, add instructions
        if has_normalized_z or has_generic_rank:
            print("Detected normalized z-score or generic rank query, using normalized z-score rank...")
            message = message + " (Please sort results by Normalized Z-Score Rank in ascending order, with Rank 1 being the best. Format each result as 'Company Name (SYMBOL) - Normalized Z-Score Rank: X out of Y')"

        # If it's a weighted z-score query, add instructions
        elif has_weighted_z:
            print("Detected weighted z-score query, adding special instructions...")
            message = message + " (Please sort results by Weighted Z-Score Rank in ascending order, with Rank 1 being the best. Format each result as 'Company Name (SYMBOL) - Weighted Z-Score Rank: X out of Y')"

        # If it's a momentum or rank query, add instructions
        elif has_momentum_filter or rank_based_query or has_rank_filter:
            print("Detected momentum or rank-based query, adding context...")

            # For "top N" queries specifically about ranks, add explicit sorting instructions
            if rank_based_query or has_rank_filter:
                print("Adding rank sorting instructions...")
                # Modify the query to explicitly instruct sorting by rank (1 is best)
                message = message + " (Please sort by momentum rank in ascending order, with Rank 1 being the best)"

        result = conversation_chain.invoke({"question": message})
        return result["answer"]
    except Exception as e:
        print(f"Error in chat function: {e}")
        return f"Sorry, an error occurred: {str(e)}"

In [None]:
# Define custom CSS for better spacing and layout

# css = """
# .gradio-container {max-width: 1200px !important; margin-left: auto !important; margin-right: auto !important;}
# .plot-container {width: 100% !important; display: flex !important; justify-content: center !important;}
# .visualization-row {display: flex !important; justify-content: space-between !important; width: 100% !important;}
# .visualization-column {flex: 1 !important; padding: 0 10px !important;}
# """

css = """
.gradio-container {
    max-width: 100% !important;
    width: 100% !important;
    margin: 0 !important;
    padding: 0 !important;
    min-height: 100vh !important;
}

/* Main row container */
.app-container {
    display: flex;
    min-height: 100vh;
}

/* Chat container styling */
.chat-container {
    display: flex;
    flex-direction: column;
    min-height: 100vh;
}

/* Target the chatbot message container */
.chat-container > .prose {
    flex: 1;
}

/* Make the message container scrollable */
.message-wrap, .chatbot-message-container {
    height: calc(100vh - 180px) !important;
    max-height: none !important;
    overflow-y: auto !important;
}

/* Visualization container styling */
.plots-container {
    display: flex;
    flex-direction: column;
    min-height: 100vh;
}

.plot-3d-container, .plot-2d-container {
    height: 50vh;
    width: 100%;
}

/* Make plot elements fill their containers */
.plot-container {
    width: 100% !important;
    height: 100% !important;
}

/* Responsive layout for smaller screens */
@media (max-width: 992px) {
    .app-container {
        flex-direction: column;
    }

    .chat-container, .plots-containers {
        min-height: 50vh;
    }

    .message-wrap, .chatbot-message-container {
        height: calc(50vh - 120px) !important;
    }
}
"""

In [None]:
# Create a Blocks interface with visualizations
with gr.Blocks(css=css, theme="soft") as view:
    with gr.Row(elem_classes="app-container"):
        # Left side - Chat interface
        with gr.Column(elem_classes="chat-container"):
            gr.HTML("<h2 style='text-align:center;'>Stock Market Analysis By AJ14314</h2>")
            chat_interface = gr.ChatInterface(
                fn=chat,
                examples=[
                    "Show me all Nifty 50 stocks",
                    "List the top 10 performing midcap stocks",
                    "What are the smallcap stocks with positive momentum?",
                    "Show microcap stocks with highest ranks",
                    "Compare Reliance and TCS performance"
                ],
                description="Ask questions about Indian stocks to get insights based on technical analysis reports.",
                theme="soft",
                autofocus=True,
                fill_height=True
            )

        # Right side - Plots container with 3D on top and 2D below
        with gr.Column(elem_classes="plots-container"):
            # 3D Plot on top
            with gr.Column(elem_classes="plot-3d-container"):
                gr.HTML("<h3 style='text-align:center;'>3D Vector Space Visualization</h3>")
                plot_3d = gr.Plot(create_3d_visualization(), elem_id="plot-3d", container=True)

            # 2D Plot below
            with gr.Column(elem_classes="plot-2d-container"):
                gr.HTML("<h3 style='text-align:center;'>2D Vector Space Visualization</h3>")
                plot_2d = gr.Plot(create_2d_visualization(), elem_id="plot-2d", container=True)

In [None]:
view.launch(inbrowser=True)