In [1]:
import pandas as pd
import numpy as np

from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
google_api_key = "AIzaSyBGIQOXNlkmsuvXbL-8m708f5bmD6vKiKc"
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=google_api_key, temperature=0.7 )

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


In [None]:
# preprocessing file (py)

In [None]:
dup_cols = [col for col in final_data.columns if col.endswith('_x') or col.endswith('_y')]
print(dup_cols)

## New merge

In [2]:
import os
import sys
import re
import logging
import pandas as pd
from datetime import datetime

# ======================================================
#          LOGGING SETUP
# ======================================================
def setup_logging():
    if not os.path.exists('logs'):
        os.makedirs('logs')
    
    log_filename = f"logs/data_processor_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
    
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(log_filename, encoding='utf-8'),
            logging.StreamHandler(sys.stdout)
        ]
    )
    return logging.getLogger("DataProcessor")

logger = setup_logging()


# ======================================================
#          HELPER FUNCTIONS
# ======================================================
def extract_fy_year(filename):
    """Extract FY year from filename automatically"""
    match = re.search(r"FY(\d{2})", filename.upper())
    if match:
        return "20" + match.group(1)
    return None


def group_files_by_fy(file_list):
    """
    Group files by fiscal year.
    Returns a dict {FY_year: [list of files]}
    """
    fy_dict = {}
    for file in file_list:
        fy = extract_fy_year(file)
        if fy:
            fy_dict.setdefault(fy, []).append(file)
    return fy_dict


# ======================================================
#          LOAD FILES FROM FOLDERS
# ======================================================
def load_files_from_subfolders(main_folder):
    """
    Reads all Excel/CSV files from three subfolders:
    - Business Numbers
    - Scores
    - Partner MIS
    Returns a dictionary with lists of file paths.
    """

    subfolders = ["Business Numbers", "Scores", "Partner MIS"]
    file_dict = {}

    for sub in subfolders:
        folder_path = os.path.join(main_folder, sub)

        if not os.path.exists(folder_path):
            logger.error(f" Folder does not exist: {folder_path}")
            file_dict[sub] = []
            continue

        logger.info(f" Scanning files from: {folder_path}")

        file_list = []
        for filename in os.listdir(folder_path):
            full_path = os.path.join(folder_path, filename)
            if filename.lower().endswith((".xlsx", ".xls", ".csv")):
                file_list.append(full_path)
                logger.info(f" Found: {filename}")
            else:
                logger.info(f" Skipped (not Excel/CSV): {filename}")

        file_dict[sub] = file_list

    return file_dict


# ======================================================
#          DATA PROCESSOR CLASS
# ======================================================
class DataProcessor:

    def __init__(self):
        self.logger = logging.getLogger("DataProcessor")
        self.logger.info("DataProcessor initialized")

    # --------------------------------------------------
    def load_business_numbers(self, file_paths):
        """Load & preprocess BN files grouped by FY"""
        self.logger.info("Loading Business Numbers")
        fy_groups = group_files_by_fy(file_paths)
        all_bn = []

        for fy, files in fy_groups.items():
            self.logger.info(f"Processing FY{fy} files: {files}")
            fy_dfs = []
            for file in files:
                try:
                    df = pd.read_excel(file)
                    df.columns = df.iloc[0]
                    df = df[1:].reset_index(drop=True)
                    fy_dfs.append(df)
                except Exception as e:
                    self.logger.error(f"Error in BN file {file}: {e}", exc_info=True)
            if fy_dfs:
                fy_total = pd.concat(fy_dfs, axis=0)
                fy_total["FY_Year"] = fy
                all_bn.append(fy_total)

        if all_bn:
            return pd.concat(all_bn, ignore_index=True)
        else:
            return pd.DataFrame()

    # --------------------------------------------------
    def load_scorecards(self, file_paths):
        """Load & preprocess Scorecard files grouped by FY"""
        self.logger.info("Loading Scorecards")
        fy_groups = group_files_by_fy(file_paths)
        all_sc = []

        for fy, files in fy_groups.items():
            self.logger.info(f"Processing FY{fy} Scorecard files: {files}")
            fy_dfs = []
            for file in files:
                try:
                    df = pd.read_excel(file)
                    df.columns = df.iloc[0]
                    df = df[1:].reset_index(drop=True)

                    # Fill forward top-level headers
                    headers = df.columns.to_list()
                    filled = []
                    last = None
                    for h in headers:
                        if pd.notna(h):
                            last = h
                        filled.append(last)
                    df.columns = filled

                    # Sub headers
                    sub = df.iloc[0].fillna('')
                    df = df[1:]
                    df.columns = [
                        f"{t}_{s}" if s != "" else t
                        for t, s in zip(df.columns, sub)
                    ]
                    fy_dfs.append(df)
                except Exception as e:
                    self.logger.error(f"Error in Scorecard file {file}: {e}", exc_info=True)

            if fy_dfs:
                fy_total = pd.concat(fy_dfs, axis=0)
                fy_total["FY_Year"] = fy
                all_sc.append(fy_total)

        if all_sc:
            return pd.concat(all_sc, ignore_index=True)
        else:
            return pd.DataFrame()

    # --------------------------------------------------
    def load_mis_data(self, file_paths):
        """Load & preprocess MIS files grouped by FY"""
        self.logger.info("Loading MIS data")
        fy_groups = group_files_by_fy(file_paths)
        all_mis = []

        for fy, files in fy_groups.items():
            self.logger.info(f"Processing FY{fy} MIS files: {files}")
            fy_dfs = []
            for file in files:
                try:
                    df = pd.read_excel(file)
                    df.columns = df.iloc[0]
                    df = df[1:].reset_index(drop=True)
                    fy_dfs.append(df)
                except Exception as e:
                    self.logger.error(f"Error in MIS file {file}: {e}", exc_info=True)

            if fy_dfs:
                fy_total = pd.concat(fy_dfs, axis=0)
                fy_total["FY_Year"] = fy
                all_mis.append(fy_total)

        if all_mis:
            return pd.concat(all_mis, ignore_index=True)
        else:
            return pd.DataFrame()

    # --------------------------------------------------
    def merge_all_data(self, bn_df, sc_df, mis_df):
        """Final merging"""
        self.logger.info("Merging BN + SC")
        merged_bn_sc = pd.merge(
            bn_df,
            sc_df,
            left_on=["Partner Code", "Partner Name", "FY_Year"],
            right_on=["Broker Code", "Partner Name", "FY_Year"],
            how="left",
            suffixes=("", "_DROP")
        )

        drop_cols = [c for c in merged_bn_sc.columns if "_DROP" in c]
        merged_bn_sc.drop(columns=drop_cols, inplace=True)

        self.logger.info("Merging with MIS")
        final_df = pd.merge(
            merged_bn_sc,
            mis_df,
            on=["Broker Code", "FY_Year"],
            how="left"
        )

        self.logger.info(f"Final dataset shape → {final_df.shape}")
        return final_df


# ======================================================
#                       MAIN
# ======================================================
def main():
    logger.info("=========== START PIPELINE ===========")

    main_folder = r"C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa"  # <-- Update path

    # Step 1: Auto-load files from folders
    file_dict = load_files_from_subfolders(main_folder)

    # Step 2: Create processor
    processor = DataProcessor()

    # Step 3: Load each section
    bn_df = processor.load_business_numbers(file_dict.get("Business Numbers", []))
    print("\n Business Numbers Sample")
    print(bn_df.head())
    print("Shape:", bn_df.shape)
    sc_df = processor.load_scorecards(file_dict.get("Scores", []))
    print("\n Scorecards Sample")
    print(sc_df.head())
    print("Shape:", sc_df.shape)
    mis_df = processor.load_mis_data(file_dict.get("Partner MIS", []))
    print("\n MIS Sample ")
    print(mis_df.head())
    print("Shape:", mis_df.shape)

    # Step 4: Merge
    final_df = processor.merge_all_data(bn_df, sc_df, mis_df)

    logger.info("=========== PIPELINE FINISHED ===========")
    print("\nFinal dataframe shape:", final_df.shape)

    return final_df


if __name__ == "__main__":
    main()


2025-12-15 15:24:06,232 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa\Business Numbers
2025-12-15 15:24:06,232 - DataProcessor - INFO -  Found: FY23 - YTD - Partner Score Card - Business Numbers.xls
2025-12-15 15:24:06,242 - DataProcessor - INFO -  Found: FY24 - YTD - Partner Score Card - Business Numbers.xls
2025-12-15 15:24:06,244 - DataProcessor - INFO -  Found: FY25 - YTD - Partner Score Card - Business Numbers.xls
2025-12-15 15:24:06,244 - DataProcessor - INFO -  Found: FY25-Q2 - YTD - Partner Score Card - Business Numbers.xls
2025-12-15 15:24:06,244 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa\Scores
2025-12-15 15:24:06,250 - DataProcessor - INFO -  Found: FY23 - YTD - Partner Score Card - Scores.xls
2025-12-15 15:24:06,250 - DataProcessor - INFO -  Found: FY24 - YTD - Partner Score Card - Scores.xls
2025-12-15 15:24:06,250 - DataProcessor - INFO -  Found: FY25 - YTD - P

In [4]:
final_df = main()

2025-12-15 15:30:19,793 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa\Business Numbers
2025-12-15 15:30:19,798 - DataProcessor - INFO -  Found: FY23 - YTD - Partner Score Card - Business Numbers.xls
2025-12-15 15:30:19,798 - DataProcessor - INFO -  Found: FY24 - YTD - Partner Score Card - Business Numbers.xls
2025-12-15 15:30:19,800 - DataProcessor - INFO -  Found: FY25 - YTD - Partner Score Card - Business Numbers.xls
2025-12-15 15:30:19,801 - DataProcessor - INFO -  Found: FY25-Q2 - YTD - Partner Score Card - Business Numbers.xls
2025-12-15 15:30:19,801 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa\Scores
2025-12-15 15:30:19,801 - DataProcessor - INFO -  Found: FY23 - YTD - Partner Score Card - Scores.xls
2025-12-15 15:30:19,801 - DataProcessor - INFO -  Found: FY24 - YTD - Partner Score Card - Scores.xls
2025-12-15 15:30:19,801 - DataProcessor - INFO -  Found: FY25 - YTD - P

In [3]:
final_df['Partner_Name'].nunique()

NameError: name 'final_df' is not defined

In [41]:
bn_df.columns.tolist()

['Sr No.',
 'Partner Code',
 'Partner Name',
 'Center',
 'Category',
 'Relationship Handler',
 'Investment Net Sales Target',
 'Investment Net Sales Achievement',
 'Investment Net Sales % Achievement',
 'Equity Sales',
 'MIP Sales',
 'Gold Sales',
 'Sales in Physical Assets ',
 'Sales in Direct Equity ',
 'FD + Bond (Primary Market) Sales',
 'Secondary Market Bond Sales',
 'Net Sales Through Realty',
 'Net NJ PMS Sales',
 'Net Non-NJ PMS Sales',
 'Net Sales through MARS',
 'SIP Sales Target',
 'SIP Sales Achievement',
 'SIP Sales % Achievement',
 'Fresh Gross SIP Sales',
 'SIP Closure / Termination',
 'FY_Year']

In [39]:
# After loading individual datasets
print("=== Business Numbers Sample ===")
print(bn_df.head())

print("\n=== Scorecards Sample ===")
print(sc_df.head())

print("\n=== MIS Sample ===")
print(mis_df.head())


=== Business Numbers Sample ===
0 Sr No. Partner Code         Partner Name            Center Category  \
0      1        23676     ALOKE CHATTERJEE  24 SOUTH PARGANA    NON D   
1      2        20361  ARINDAM CHAKRAVARTI  24 SOUTH PARGANA    NON D   
2      3        24695    CHINTU KUMAR SHAW  24 SOUTH PARGANA    NON D   
3      4        24040       GOURAB PURKAIT  24 SOUTH PARGANA    NON D   
4      5        25776    RAJ KISHORE BARIK  24 SOUTH PARGANA    NON D   

0 Relationship Handler Investment Net Sales Target  \
0        SUBRATA MAITY                     1500000   
1        SUBRATA MAITY                     3360000   
2        SUBRATA MAITY                     1200000   
3        SUBRATA MAITY                     1500000   
4        SUBRATA MAITY                      600000   

0 Investment Net Sales Achievement Investment Net Sales % Achievement  \
0                        104696.25                               6.98   
1                       1458896.25                        

NameError: name 'sc_df' is not defined

In [6]:
final_df = main()

2025-12-03 10:57:16,972 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa\Business Numbers
2025-12-03 10:57:16,974 - DataProcessor - INFO -  Found: FY23 - YTD - Partner Score Card - Business Numbers.xls
2025-12-03 10:57:16,974 - DataProcessor - INFO -  Found: FY24 - YTD - Partner Score Card - Business Numbers.xls
2025-12-03 10:57:16,974 - DataProcessor - INFO -  Found: FY25 - YTD - Partner Score Card - Business Numbers.xls
2025-12-03 10:57:16,974 - DataProcessor - INFO -  Found: FY25-Q2 - YTD - Partner Score Card - Business Numbers.xls
2025-12-03 10:57:16,974 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa\Scores
2025-12-03 10:57:16,974 - DataProcessor - INFO -  Found: FY23 - YTD - Partner Score Card - Scores.xls
2025-12-03 10:57:16,980 - DataProcessor - INFO -  Found: FY24 - YTD - Partner Score Card - Scores.xls
2025-12-03 10:57:16,980 - DataProcessor - INFO -  Found: FY25 - YTD - P

In [8]:
main_folder = r"C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa"  
file_dict = load_files_from_subfolders(main_folder)


2025-12-03 11:03:07,354 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa\Business Numbers
2025-12-03 11:03:07,354 - DataProcessor - INFO -  Found: FY23 - YTD - Partner Score Card - Business Numbers.xls
2025-12-03 11:03:07,354 - DataProcessor - INFO -  Found: FY24 - YTD - Partner Score Card - Business Numbers.xls
2025-12-03 11:03:07,356 - DataProcessor - INFO -  Found: FY25 - YTD - Partner Score Card - Business Numbers.xls
2025-12-03 11:03:07,356 - DataProcessor - INFO -  Found: FY25-Q2 - YTD - Partner Score Card - Business Numbers.xls
2025-12-03 11:03:07,356 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa\Scores
2025-12-03 11:03:07,360 - DataProcessor - INFO -  Found: FY23 - YTD - Partner Score Card - Scores.xls
2025-12-03 11:03:07,360 - DataProcessor - INFO -  Found: FY24 - YTD - Partner Score Card - Scores.xls
2025-12-03 11:03:07,360 - DataProcessor - INFO -  Found: FY25 - YTD - P

In [9]:
# Load all files into file_dict
file_dict = load_files_from_subfolders(main_folder)
processor = DataProcessor()

# Business Numbers
bn_df = processor.load_business_numbers(file_dict.get("Business Numbers", []))
print(bn_df['FY_Year'].value_counts())


2025-12-03 11:03:09,080 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa\Business Numbers
2025-12-03 11:03:09,081 - DataProcessor - INFO -  Found: FY23 - YTD - Partner Score Card - Business Numbers.xls
2025-12-03 11:03:09,082 - DataProcessor - INFO -  Found: FY24 - YTD - Partner Score Card - Business Numbers.xls
2025-12-03 11:03:09,083 - DataProcessor - INFO -  Found: FY25 - YTD - Partner Score Card - Business Numbers.xls
2025-12-03 11:03:09,086 - DataProcessor - INFO -  Found: FY25-Q2 - YTD - Partner Score Card - Business Numbers.xls
2025-12-03 11:03:09,087 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa\Scores
2025-12-03 11:03:09,087 - DataProcessor - INFO -  Found: FY23 - YTD - Partner Score Card - Scores.xls
2025-12-03 11:03:09,087 - DataProcessor - INFO -  Found: FY24 - YTD - Partner Score Card - Scores.xls
2025-12-03 11:03:09,089 - DataProcessor - INFO -  Found: FY25 - YTD - P

In [10]:
final_df.head()

Unnamed: 0,Sr No.,Partner Code,Partner Name_x,Center_x,Category_x,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year,Sr No,Broker Code,Doer Name_x,Doer Type_x,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement_x,ZM,SRM,RM,BM,Partner Name_y,Category_y,Doer Name_y,Doer Type_y,Center_y,Age in NJ,Age in NJ Target,Total AUM FY 23-24 Q4 YTD,Equity+ Hyb AUM FY 23-24 Q4 YTD,LIVE SIP FY 23-24 Q4 YTD,Total Net Sales\nFY 23-24 YTD,Equity Net Sales\nFY 23-24 YTD,Net SIP\nFY 23-24 YTD,MARS AUM FY 23-24 YTD,MARS Net Sales FY 23-24 YTD,PMS AUM FY 23-24 YTD,Net Sales FY 23-24 YTD,Clients Acquired FY 23-24 YTD,Live Accounts FY 23-24 YTD,Saturday School (YTD) FY 23-24 Q4 YTD,Investment\nSaturday School (YTD) FY 23-24 Q4 YTD,Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD,Total Group FY 23-24 Q4 YTD,Group Covered FY 23-24 Q4 YTD,% Covered,Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr),Total Reviews\nFY 23-24 Q4 YTD,Amount FY 23-24 Q4 YTD\n(in Cr),Flexicap Target\nFY 23-24 Q4 YTD\n(in Cr),Flexicap Ach\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Target\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Ach\nFY 23-24 Q4 YTD\n(in Cr),Target Qty (FY 23-24 Q4 YTD)\n(in Cr),Order Qty\n(FY 23-24 Q4 YTD)\n(in Cr),Equity Net Sales% Achievement,SIP Sales% Achievement,Client Acquisition% Achievement,Insurance% Achievement,LAS% Achievement,Total % Achievement_y,Status,Total AUM FY 24-25 Q4 YTD,Equity AUM FY 24-25 Q4 YTD,LIVE SIP FY 24-25 Q4 YTD,Total Net Sales\nFY 24-25 Q4 YTD,Equity Net Sales\nFY 24-25 Q4 YTD,Net SIP\nFY 24-25 Q4 YTD,MARS AUM FY 24-25 Q4 YTD,MARS Net Sales FY 24-25 Q4 YTD,PMS AUM FY 24-25 Q4 YTD,Net Sales FY 24-25 Q4 YTD,Clients Acquired FY 24-25 Q4 YTD,Live Accounts (Non-D) FY 24-25 Q4 YTD,Saturday School (YTD) FY 24-25 Q4 YTD,Investment\nSaturday School (YTD) FY 24-25 Q4 YTD,Insurance\nSaturday School (YTD) FY 24-25 Q4 YTD,Total Group FY 24-25 Q4 YTD,Group Covered FY 24-25 Q4 YTD,Total Reviews\nFY 24-25 Q4 YTD,Non-NJ AUM\nFY 24-25 Q4 YTD,Amount FY 24-25 Q4 YTD,Flexicap Target\nFY 24-25 Q4 YTD,Flexicap Ach\nFY 24-25 Q4 YTD,AMC NS Target\nFY 24-25 Q4 YTD,AMC NS Ach\nFY 24-25 Q4 YTD,Target Qty (FY 24-25 Q4 YTD)\n(in Cr),Order Qty\n(FY 24-25 Q4 YTD)\n(in Cr),MARS TO EQ AUM
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,1,23676,SUBRATA MAITY,Fundz Express,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,2,20361,SUBRATA MAITY,Fundz Express,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,3,24695,CHINTU KUMAR SHAW,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1200000,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,27000,0.0,0.0,0.0,0.0,2023,3,24695,SUBRATA MAITY,Fundz Express,1200000,0.0,0.0,,,,27000,0.0,0.0,16,3.0,18.75,0.0,0,,0.0,0.0,2.81,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,4,24040,GOURAB PURKAIT,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,26998.7,1.8,26998.7,0,0,0,0,0,0,0,0,0,0,33750,10499.49,31.11,10499.49,0.0,2023,4,24040,SUBRATA MAITY,Fundz Express,1500000,26998.7,1.8,,,,33750,10499.49,31.11,20,3.0,15.0,3.11,0,0.0,6999.66,2.33,14.59,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,5,25776,RAJ KISHORE BARIK,24 SOUTH PARGANA,NON D,SUBRATA MAITY,600000,496499.92,82.75,1499.92,0,0,0,0,0,0,0,0,0,495000,13500,1499.92,11.11,1499.92,0.0,2023,5,25776,SUBRATA MAITY,Fundz Express,600000,496499.92,82.75,,,,13500,1499.92,11.11,8,3.0,37.5,0.0,0,,0.0,0.0,40.82,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


###NEW

In [11]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ================= GEMINI SETUP =================
gemini_api_key = "AIzaSyADwv71iu5J5m9TK0oV2lDgZxQrTLkg1K0"
genai.configure(api_key=gemini_api_key)

class DeepInsightGenerator:
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str, max_words: int = 200) -> str:
        try:
            response = self.model.generate_content(prompt)
            words = response.text.strip().split()
            return " ".join(words[:max_words])
        except Exception as e:
            return f"Error generating insight: {e}"

generator = DeepInsightGenerator()

# ================= CONFIG =================
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]
thresholds = [25, 50, 75, 80, 90]

# ONLY TOP 1
top_bms = 1
top_partners = 1
top_n = 1  

threshold_pct_change = 50  

# ORIGINAL PROMPTS
kpi_prompts = {
    'Equity Sales': "Analyze Equity Sales performance.",
    'SIP Sales Achievement': "Analyze SIP Sales Achievement performance.",
    'Net Sales through MARS': "Analyze Net Sales through MARS performance.",
    'Investment Net Sales Achievement': "Analyze Investment Net Sales Achievement."
}

# ================= HELPER FUNCTIONS =================
def partner_concentration(df, kpi):
    df_sorted = df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
    total_kpi = df_sorted[kpi].sum()
    df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
    df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100 if total_kpi != 0 else 0
    total_partners = len(df_sorted)
    conc_summary = {}
    for t in thresholds:
        if not df_sorted.empty:
            num_partners = (df_sorted['Cumulative KPI %'] >= t).idxmax() + 1
            conc_summary[f'Partners for {t}%'] = num_partners
            conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)
    return conc_summary, total_partners, df_sorted

def generate_partner_concentration_insight(df, kpi):
    conc_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        conc_summary, total_partners, df_sorted = partner_concentration(bm_df, kpi)
        text = f"Branch Manager: {bm}\nKPI: {kpi}\nTotal Partners: {total_partners}\n"
        for t in thresholds:
            text += f"- Top {conc_summary.get(f'Partners for {t}%', 0)} partners ({conc_summary.get(f'% of Total Partners for {t}%',0)}%) contribute {t}% of total {kpi}.\n"
        topX = df_sorted.head(top_partners)[['Partner Name_x', kpi, 'Cumulative KPI %']]
        text += "\nTop Partner:\n"
        for _, row in topX.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['Cumulative KPI %']:.2f}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Generate insights ONLY from the data provided — do not give generic advices.
Write insights in this exact structure:
1. Concentration level 
2. Risk/impact 
3. Action recommendation Limit to 200 words
Limit to 200 words.
"""
        insight = generator.generate_insight(prompt)
        conc_texts[bm] = text + "\nLLM Insight:\n" + insight
    return conc_texts

def generate_leaders_laggers_insight(df, kpi):
    ll_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        perf = bm_df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
        total_perf = perf[kpi].sum() if perf[kpi].sum()!=0 else 1
        leaders = perf.head(top_n).copy()
        leaders['% Share'] = round(leaders[kpi]/total_perf*100,2)
        laggers = perf.tail(top_n).copy()
        laggers['% Share'] = round(laggers[kpi]/total_perf*100,2)
        text = f"Branch Manager: {bm}\nKPI: {kpi}\nTop Leader:\n"
        for _, row in leaders.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        text += "\nBottom Lagger:\n"
        for _, row in laggers.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Give insights:
1. Dependency on leader
2. Fix for lagger
3. Impact on BM
Limit to 200 words.
"""
        insight = generator.generate_insight(prompt)
        ll_texts[bm] = text + "\nLLM Insight:\n" + insight
    return ll_texts

def detect_drastic_changes(df):
    change_records = []
    df_sorted = df.sort_values(['BM', 'Partner Name_x', 'FY_Year'])
    for (bm, partner), group in df_sorted.groupby(['BM','Partner Name_x']):
        group = group.sort_values('FY_Year')
        for i in range(1,len(group)):
            prev, curr = group.iloc[i-1], group.iloc[i]
            for kpi in kpi_list:
                prev_val, curr_val = prev[kpi], curr[kpi]
                if pd.isna(prev_val) or pd.isna(curr_val) or prev_val==0: continue
                pct_change = (curr_val-prev_val)/abs(prev_val)*100
                if abs(pct_change)>=threshold_pct_change:
                    change_records.append({
                        'BM': bm, 'Partner Name_x': partner, 'KPI': kpi,
                        'Year From': prev['FY_Year'], 'Year To': curr['FY_Year'],
                        'Previous Value': round(prev_val,2), 'Current Value': round(curr_val,2),
                        '% Change': round(pct_change,2)
                    })
    return pd.DataFrame(change_records)

def generate_drastic_change_insight(df):
    change_df = detect_drastic_changes(df)
    if change_df.empty:
        return {"All":"No significant changes detected."}
    bm_insights = {}
    for bm, group in change_df.groupby('BM'):
        combined = f"Branch Manager: {bm}\n"
        top_partner = (
            group.groupby('Partner Name_x')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(top_partners)
            .index
        )
        for partner in top_partner:
            partner_data = group[group['Partner Name_x']==partner]
            prompt = f"""
Data:
{partner_data}
Explain:
Explain:
1. Largest KPI Change – Identify the KPI and partner with the biggest positive/negative change and quantify it.
2. Reason & Risk – Explain the reason based on data patterns (e.g., low base effect, sudden spike/drop, big-ticket inflow/outflow) and the risk (volatility, dependency, sustainability).
3. BM Action – Give MF-sales-specific actions (partner engagement, review flows, strengthen continuity, activate mid-tier partners). Avoid generic advice.
Limit to 200 words.
"""
            insight = generator.generate_insight(prompt)
            combined += f"\nPartner: {partner}\n{insight}\n" + ("-"*80) + "\n"
        bm_insights[bm] = combined
    return bm_insights

def generate_focus_area_insight(df):
    focus_records = []
    for (bm, partner), group in df.groupby(['BM','Partner Name_x']):
        partner_row = group.iloc[-1]
        gaps = {}
        for kpi in kpi_list:
            target_cols = [col for col in df.columns if 'Target' in col and kpi.split()[0] in col]
            target_val = partner_row[target_cols[0]] if target_cols else np.nan
            gap_pct = ((target_val - partner_row[kpi])/target_val*100 if target_val not in [0, None, np.nan] else 0)
            gaps[kpi] = round(gap_pct,2)
        max_gap = max(gaps.values())
        sorted_gaps = sorted(gaps.items(), key=lambda x:x[1], reverse=True)
        focus_records.append({'BM':bm,'Partner Name_x':partner,'Focus KPIs':sorted_gaps,'Max Gap':max_gap})
    focus_df = pd.DataFrame(focus_records)
    top_bm = focus_df.groupby('BM')['Max Gap'].max().sort_values(ascending=False).head(top_bms).index
    filtered = focus_df[focus_df['BM'].isin(top_bm)]
    top_data = (
        filtered.groupby('BM',group_keys=False)
        .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
        .reset_index(drop=True)
    )
    bm_insights = {}
    for bm, group in top_data.groupby('BM'):
        bm_insights[bm] = {}
        for _, row in group.iterrows():
            partner = row['Partner Name_x']
            focus_kpis = row['Focus KPIs']
            prompt = f"""
Data:
{focus_kpis}
Explain:
1. KPIs to fix
2. Improvement required
3. BM action plan
Limit to 200 words.
"""
            insight = generator.generate_insight(prompt)
            bm_insights[bm][partner] = insight
    return bm_insights


# ========== MAIN EXECUTION (TOP 1 ONLY) ==========
final_insights = {}

# TOP 1 ZM
top_zms = final_df.groupby('ZM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index

for zm in top_zms:
    zm_group = final_df[final_df['ZM'] == zm]
    final_insights[zm] = {}

    # TOP 1 BM under this ZM
    top_bm = zm_group.groupby('BM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index
    bm_df = zm_group[zm_group['BM'].isin(top_bm)]

    final_insights[zm]['ZM_Overview'] = {}
    for kpi in kpi_list:
        final_insights[zm]['ZM_Overview'][kpi] = {
            'Partner Concentration': generate_partner_concentration_insight(bm_df, kpi),
            'Leaders & Laggers': generate_leaders_laggers_insight(bm_df, kpi),
            'Drastic Change': generate_drastic_change_insight(bm_df),
            'Areas to Focus': generate_focus_area_insight(bm_df)
        }

    # BM LEVEL
    for bm in top_bm:
        bm_group = zm_group[zm_group['BM'] == bm]
        final_insights[zm][bm] = {}

        for kpi in kpi_list:
            final_insights[zm][bm][kpi] = {
                'Partner Concentration': generate_partner_concentration_insight(bm_group, kpi),
                'Leaders & Laggers': generate_leaders_laggers_insight(bm_group, kpi),
                'Drastic Change': generate_drastic_change_insight(bm_group),
                'Areas to Focus': generate_focus_area_insight(bm_group)
            }

        # TOP 1 RH
        top_rh = (
            bm_group.groupby('Relationship Handler')[kpi_list]
            .sum().sum(axis=1)
            .sort_values(ascending=False)
            .head(1).index
        )

        for rh in top_rh:
            rh_group = bm_group[bm_group['Relationship Handler'] == rh]
            final_insights[zm][bm][rh] = {}
            for kpi in kpi_list:
                final_insights[zm][bm][rh][kpi] = {
                    'Partner Concentration': generate_partner_concentration_insight(rh_group, kpi),
                    'Leaders & Laggers': generate_leaders_laggers_insight(rh_group, kpi),
                    'Drastic Change': generate_drastic_change_insight(rh_group),
                    'Areas to Focus': generate_focus_area_insight(rh_group)
                }


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


In [12]:
import json

print(json.dumps(final_insights, indent=4))
def pretty_print_insights(insights, level=0):
    indent = "    " * level
    for key, value in insights.items():
        if isinstance(value, dict):
            print(f"\n{indent}{key}:")
            pretty_print_insights(value, level + 1)
        else:
            print(f"{indent}- {key}: {value}")  

pretty_print_insights(final_insights)


{
    "SARFARAZ ABDULLA PATEL": {
        "ZM_Overview": {
            "Equity Sales": {
                "Partner Concentration": {
                    "AJINKYA BHIMRAO GURAV": "Branch Manager: AJINKYA BHIMRAO GURAV\nKPI: Equity Sales\nTotal Partners: 396\n- Top 5 partners (1.26%) contribute 25% of total Equity Sales.\n- Top 13 partners (3.28%) contribute 50% of total Equity Sales.\n- Top 38 partners (9.6%) contribute 75% of total Equity Sales.\n- Top 47 partners (11.87%) contribute 80% of total Equity Sales.\n- Top 76 partners (19.19%) contribute 90% of total Equity Sales.\n\nTop Partner:\n- ARVIND S LATKAR: 1,103,469,217.74 (8.29%)\n\nLLM Insight:\nHere's an analysis of the equity sales performance based solely on the provided data: 1. **Concentration Level:** Equity sales are heavily concentrated. A small percentage of partners generate a disproportionately large share of total sales. The top 1.26% of partners drive 25% of sales, and 19.19% drive 90%. 2. **Top Partner & Over-Depende

In [14]:
def pretty_print_insights(insights, level=0):
    indent = "    " * level

    if isinstance(insights, dict):
        for key, value in insights.items():
            print(f"\n{indent}{key}:")
            pretty_print_insights(value, level + 1)

    elif isinstance(insights, list):
        for item in insights:
            pretty_print_insights(item, level)

    else:
        # For long LLM text, print cleanly with indentation
        lines = str(insights).split("\n")
        for line in lines:
            print(f"{indent}- {line.strip()}")


In [15]:
pretty_print_insights(final_insights)


SARFARAZ ABDULLA PATEL:

    ZM_Overview:

        Equity Sales:

            Partner Concentration:

                AJINKYA BHIMRAO GURAV:
                    - Branch Manager: AJINKYA BHIMRAO GURAV
                    - KPI: Equity Sales
                    - Total Partners: 396
                    - - Top 5 partners (1.26%) contribute 25% of total Equity Sales.
                    - - Top 13 partners (3.28%) contribute 50% of total Equity Sales.
                    - - Top 38 partners (9.6%) contribute 75% of total Equity Sales.
                    - - Top 47 partners (11.87%) contribute 80% of total Equity Sales.
                    - - Top 76 partners (19.19%) contribute 90% of total Equity Sales.
                    - 
                    - Top Partner:
                    - - ARVIND S LATKAR: 1,103,469,217.74 (8.29%)
                    - 
                    - LLM Insight:
                    - Here's an analysis of the equity sales performance based solely on the provided da

In [44]:
final_df.columns.tolist()

['Sr No.',
 'Partner Code',
 'Partner Name_x',
 'Center_x',
 'Category_x',
 'Relationship Handler',
 'Investment Net Sales Target',
 'Investment Net Sales Achievement',
 'Investment Net Sales % Achievement',
 'Equity Sales',
 'MIP Sales',
 'Gold Sales',
 'Sales in Physical Assets ',
 'Sales in Direct Equity ',
 'FD + Bond (Primary Market) Sales',
 'Secondary Market Bond Sales',
 'Net Sales Through Realty',
 'Net NJ PMS Sales',
 'Net Non-NJ PMS Sales',
 'Net Sales through MARS',
 'SIP Sales Target',
 'SIP Sales Achievement',
 'SIP Sales % Achievement',
 'Fresh Gross SIP Sales',
 'SIP Closure / Termination',
 'FY_Year',
 'Sr No',
 'Broker Code',
 'Doer Name_x',
 'Doer Type_x',
 'Equity Net Sales_Target',
 'Equity Net Sales_Achievement',
 'Equity Net Sales_% Achievement',
 'Insurance_Target',
 'Insurance_Achievement',
 'Insurance_% Achievement',
 'SIP Sales_Target',
 'SIP Sales_Achievement',
 'SIP Sales_% Achievement',
 'Client Acquisition_Target',
 'Client Acquisition_Achievement',
 'Cli

In [43]:
bn_df.columns.tolist()

['Sr No.',
 'Partner Code',
 'Partner Name',
 'Center',
 'Category',
 'Relationship Handler',
 'Investment Net Sales Target',
 'Investment Net Sales Achievement',
 'Investment Net Sales % Achievement',
 'Equity Sales',
 'MIP Sales',
 'Gold Sales',
 'Sales in Physical Assets ',
 'Sales in Direct Equity ',
 'FD + Bond (Primary Market) Sales',
 'Secondary Market Bond Sales',
 'Net Sales Through Realty',
 'Net NJ PMS Sales',
 'Net Non-NJ PMS Sales',
 'Net Sales through MARS',
 'SIP Sales Target',
 'SIP Sales Achievement',
 'SIP Sales % Achievement',
 'Fresh Gross SIP Sales',
 'SIP Closure / Termination',
 'FY_Year']

###prompt- (previous)

In [None]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ================= GEMINI SETUP =================
gemini_api_key = "AIzaSyADwv71iu5J5m9TK0oV2lDgZxQrTLkg1K0"
genai.configure(api_key=gemini_api_key)

class DeepInsightGenerator:
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str, max_words: int = 200) -> str:
        try:
            response = self.model.generate_content(prompt)
            words = response.text.strip().split()
            return " ".join(words[:max_words])
        except Exception as e:
            return f"Error generating insight: {e}"

generator = DeepInsightGenerator()

# ================= CONFIG =================
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]
thresholds = [25, 50, 75, 80, 90]

# ONLY TOP 1
top_bms = 1
top_partners = 1
top_n = 1  

threshold_pct_change = 50  

# ORIGINAL PROMPTS
kpi_prompts = {
    'Equity Sales': "Analyze Equity Sales performance.",
    'SIP Sales Achievement': "Analyze SIP Sales Achievement performance.",
    'Net Sales through MARS': "Analyze Net Sales through MARS performance.",
    'Investment Net Sales Achievement': "Analyze Investment Net Sales Achievement."
}

# ================= HELPER FUNCTIONS =================
def partner_concentration(df, kpi):
    df_sorted = df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
    total_kpi = df_sorted[kpi].sum()
    df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
    df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100 if total_kpi != 0 else 0
    total_partners = len(df_sorted)
    conc_summary = {}
    for t in thresholds:
        if not df_sorted.empty:
            num_partners = (df_sorted['Cumulative KPI %'] >= t).idxmax() + 1
            conc_summary[f'Partners for {t}%'] = num_partners
            conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)
    return conc_summary, total_partners, df_sorted

def generate_partner_concentration_insight(df, kpi):
    conc_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        conc_summary, total_partners, df_sorted = partner_concentration(bm_df, kpi)
        text = f"Branch Manager: {bm}\nKPI: {kpi}\nTotal Partners: {total_partners}\n"
        for t in thresholds:
            text += f"- Top {conc_summary.get(f'Partners for {t}%', 0)} partners ({conc_summary.get(f'% of Total Partners for {t}%',0)}%) contribute {t}% of total {kpi}.\n"
        topX = df_sorted.head(top_partners)[['Partner Name_x', kpi, 'Cumulative KPI %']]
        text += "\nTop Partner:\n"
        for _, row in topX.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['Cumulative KPI %']:.2f}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Generate insights ONLY from the data provided — do not give generic advices.
Write insights in this exact structure:
1. Concentration level 
2. Risk/impact 
3. Action recommendation Limit to 200 words.
"""
        insight = generator.generate_insight(prompt)
        conc_texts[bm] = text + "\nLLM Insight:\n" + insight
    return conc_texts

def generate_leaders_laggers_insight(df, kpi):
    ll_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        perf = bm_df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
        total_perf = perf[kpi].sum() if perf[kpi].sum()!=0 else 1
        leaders = perf.head(top_n).copy()
        leaders['% Share'] = round(leaders[kpi]/total_perf*100,2)
        laggers = perf.tail(top_n).copy()
        laggers['% Share'] = round(laggers[kpi]/total_perf*100,2)
        text = f"Branch Manager: {bm}\nKPI: {kpi}\nTop Leader:\n"
        for _, row in leaders.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        text += "\nBottom Lagger:\n"
        for _, row in laggers.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Give insights:
1. Concentration level 
2. Risk/impact 
3. Action recommendation
Limit to 200 words.
"""
        insight = generator.generate_insight(prompt)
        ll_texts[bm] = text + "\nLLM Insight:\n" + insight
    return ll_texts

def detect_drastic_changes(df):
    change_records = []
    df_sorted = df.sort_values(['BM', 'Partner Name_x', 'FY_Year'])
    for (bm, partner), group in df_sorted.groupby(['BM','Partner Name_x']):
        group = group.sort_values('FY_Year')
        for i in range(1,len(group)):
            prev, curr = group.iloc[i-1], group.iloc[i]
            for kpi in kpi_list:
                prev_val, curr_val = prev[kpi], curr[kpi]
                if pd.isna(prev_val) or pd.isna(curr_val) or prev_val==0: continue
                pct_change = (curr_val-prev_val)/abs(prev_val)*100
                if abs(pct_change)>=threshold_pct_change:
                    change_records.append({
                        'BM': bm, 'Partner Name_x': partner, 'KPI': kpi,
                        'Year From': prev['FY_Year'], 'Year To': curr['FY_Year'],
                        'Previous Value': round(prev_val,2), 'Current Value': round(curr_val,2),
                        '% Change': round(pct_change,2)
                    })
    return pd.DataFrame(change_records)

def generate_drastic_change_insight(df):
    change_df = detect_drastic_changes(df)
    if change_df.empty:
        return {"All":"No significant changes detected."}
    bm_insights = {}
    for bm, group in change_df.groupby('BM'):
        combined = f"Branch Manager: {bm}\n"
        top_partner = (
            group.groupby('Partner Name_x')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(top_partners)
            .index
        )
        for partner in top_partner:
            partner_data = group[group['Partner Name_x']==partner]
            prompt = f"""
Data:
{partner_data}
Explain:
1. Largest KPI change
2. Reason & risk
3. BM action
Limit to 200 words.
"""
            insight = generator.generate_insight(prompt)
            combined += f"\nPartner: {partner}\n{insight}\n" + ("-"*80) + "\n"
        bm_insights[bm] = combined
    return bm_insights

def generate_focus_area_insight(df):
    focus_records = []
    for (bm, partner), group in df.groupby(['BM','Partner Name_x']):
        partner_row = group.iloc[-1]
        gaps = {}
        for kpi in kpi_list:
            target_cols = [col for col in df.columns if 'Target' in col and kpi.split()[0] in col]
            target_val = partner_row[target_cols[0]] if target_cols else np.nan
            gap_pct = ((target_val - partner_row[kpi])/target_val*100 if target_val not in [0, None, np.nan] else 0)
            gaps[kpi] = round(gap_pct,2)
        max_gap = max(gaps.values())
        sorted_gaps = sorted(gaps.items(), key=lambda x:x[1], reverse=True)
        focus_records.append({'BM':bm,'Partner Name_x':partner,'Focus KPIs':sorted_gaps,'Max Gap':max_gap})
    focus_df = pd.DataFrame(focus_records)
    top_bm = focus_df.groupby('BM')['Max Gap'].max().sort_values(ascending=False).head(top_bms).index
    filtered = focus_df[focus_df['BM'].isin(top_bm)]
    top_data = (
        filtered.groupby('BM',group_keys=False)
        .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
        .reset_index(drop=True)
    )
    bm_insights = {}
    for bm, group in top_data.groupby('BM'):
        bm_insights[bm] = {}
        for _, row in group.iterrows():
            partner = row['Partner Name_x']
            focus_kpis = row['Focus KPIs']
            prompt = f"""
Data:
{focus_kpis}
Explain:
1. KPIs to fix
2. Improvement required
3. BM action plan
Limit to 200 words.
"""
            insight = generator.generate_insight(prompt)
            bm_insights[bm][partner] = insight
    return bm_insights


# ========== MAIN EXECUTION (TOP 1 ONLY) ==========
final_insights = {}

# TOP 1 ZM
top_zms = final_df.groupby('ZM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index

for zm in top_zms:
    zm_group = final_df[final_df['ZM'] == zm]
    final_insights[zm] = {}

    # TOP 1 BM under this ZM
    top_bm = zm_group.groupby('BM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index
    bm_df = zm_group[zm_group['BM'].isin(top_bm)]

    final_insights[zm]['ZM_Overview'] = {}
    for kpi in kpi_list:
        final_insights[zm]['ZM_Overview'][kpi] = {
            'Partner Concentration': generate_partner_concentration_insight(bm_df, kpi),
            'Leaders & Laggers': generate_leaders_laggers_insight(bm_df, kpi),
            'Drastic Change': generate_drastic_change_insight(bm_df),
            'Areas to Focus': generate_focus_area_insight(bm_df)
        }

    # BM LEVEL
    for bm in top_bm:
        bm_group = zm_group[zm_group['BM'] == bm]
        final_insights[zm][bm] = {}

        for kpi in kpi_list:
            final_insights[zm][bm][kpi] = {
                'Partner Concentration': generate_partner_concentration_insight(bm_group, kpi),
                'Leaders & Laggers': generate_leaders_laggers_insight(bm_group, kpi),
                'Drastic Change': generate_drastic_change_insight(bm_group),
                'Areas to Focus': generate_focus_area_insight(bm_group)
            }

        # TOP 1 RH
        top_rh = (
            bm_group.groupby('Relationship Handler')[kpi_list]
            .sum().sum(axis=1)
            .sort_values(ascending=False)
            .head(1).index
        )

        for rh in top_rh:
            rh_group = bm_group[bm_group['Relationship Handler'] == rh]
            final_insights[zm][bm][rh] = {}
            for kpi in kpi_list:
                final_insights[zm][bm][rh][kpi] = {
                    'Partner Concentration': generate_partner_concentration_insight(rh_group, kpi),
                    'Leaders & Laggers': generate_leaders_laggers_insight(rh_group, kpi),
                    'Drastic Change': generate_drastic_change_insight(rh_group),
                    'Areas to Focus': generate_focus_area_insight(rh_group)
                }


In [120]:
import json

print(json.dumps(final_insights, indent=4))
def pretty_print_insights(insights, level=0):
    indent = "    " * level
    for key, value in insights.items():
        if isinstance(value, dict):
            print(f"\n{indent}{key}:")
            pretty_print_insights(value, level + 1)
        else:
            print(f"{indent}- {key}: {value}")  

pretty_print_insights(final_insights)


{
    "SARFARAZ ABDULLA PATEL": {
        "ZM_Overview": {
            "Equity Sales": {
                "Partner Concentration": {
                    "AJINKYA BHIMRAO GURAV": "Branch Manager: AJINKYA BHIMRAO GURAV\nKPI: Equity Sales\nTotal Partners: 396\n- Top 5 partners (1.26%) contribute 25% of total Equity Sales.\n- Top 13 partners (3.28%) contribute 50% of total Equity Sales.\n- Top 38 partners (9.6%) contribute 75% of total Equity Sales.\n- Top 47 partners (11.87%) contribute 80% of total Equity Sales.\n- Top 76 partners (19.19%) contribute 90% of total Equity Sales.\n\nTop Partner:\n- ARVIND S LATKAR: 1,103,469,217.74 (8.29%)\n\nLLM Insight:\nEquity Sales performance under Branch Manager Ajinkya Bhimrao Gurav reveals a high concentration of sales among a small percentage of partners. **1. Concentration Level:** A significant portion of Equity Sales is driven by a small group of partners. The top 19.19% of partners generate 90% of total sales, highlighting a skewed distribution.

In [121]:
pretty_print_insights(final_insights)


SARFARAZ ABDULLA PATEL:

    ZM_Overview:

        Equity Sales:

            Partner Concentration:
                - AJINKYA BHIMRAO GURAV: Branch Manager: AJINKYA BHIMRAO GURAV
KPI: Equity Sales
Total Partners: 396
- Top 5 partners (1.26%) contribute 25% of total Equity Sales.
- Top 13 partners (3.28%) contribute 50% of total Equity Sales.
- Top 38 partners (9.6%) contribute 75% of total Equity Sales.
- Top 47 partners (11.87%) contribute 80% of total Equity Sales.
- Top 76 partners (19.19%) contribute 90% of total Equity Sales.

Top Partner:
- ARVIND S LATKAR: 1,103,469,217.74 (8.29%)

LLM Insight:
Equity Sales performance under Branch Manager Ajinkya Bhimrao Gurav reveals a high concentration of sales among a small percentage of partners. **1. Concentration Level:** A significant portion of Equity Sales is driven by a small group of partners. The top 19.19% of partners generate 90% of total sales, highlighting a skewed distribution. Arvind S Latkar alone accounts for 8.29% of all

In [20]:
perf = bm_df.groupby('Partner Name_x')['Equity Sales'].sum().sort_values(ascending=False)

In [21]:
perf.head()

Partner Name_x
ARVIND S LATKAR                   1103469217.74
VINODKUMAR K UPADHYAY             1070425907.79
CHAITANYA RAMESH SHIPURKAR         561016368.64
PRAVIN B KHATAVKAR                 528607474.98
GIRISHA WEALTH PRIVATE LIMITED     495000818.64
Name: Equity Sales, dtype: object

In [22]:
perf.tail()

Partner Name_x
ARTHAVRUDDHI WEALTH         -20286197.02
Manojkumar Anna Alamane      -47015634.2
VISHWASRAO B PATIL          -64212000.47
Anil Babgonda Patil         -75666287.12
Milind Dharamgounda Patil   -299061651.3
Name: Equity Sales, dtype: object

##### New Prompt Approach-2

In [26]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ================= GEMINI SETUP =================
gemini_api_key = "AIzaSyADwv71iu5J5m9TK0oV2lDgZxQrTLkg1K0"
genai.configure(api_key=gemini_api_key)

class DeepInsightGenerator:
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str, max_words: int = 200) -> str:
        try:
            response = self.model.generate_content(prompt)
            words = response.text.strip().split()
            return " ".join(words[:max_words])
        except Exception as e:
            return f"Error generating insight: {e}"

generator = DeepInsightGenerator()

# ================= CONFIG =================
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]
thresholds = [25, 50, 75, 80, 90]

top_bms = 1
top_partners = 1
top_n = 1  
threshold_pct_change = 50  

kpi_prompts = {
    'Equity Sales': "Analyze Equity Sales performance.",
    'SIP Sales Achievement': "Analyze SIP Sales Achievement performance.",
    'Net Sales through MARS': "Analyze Net Sales through MARS performance.",
    'Investment Net Sales Achievement': "Analyze Investment Net Sales Achievement."
}

# ================= HELPER FUNCTIONS =================
def partner_concentration(df, kpi):
    df_sorted = df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
    total_kpi = df_sorted[kpi].sum()
    df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
    df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100 if total_kpi != 0 else 0
    total_partners = len(df_sorted)
    conc_summary = {}
    for t in thresholds:
        if not df_sorted.empty:
            num_partners = (df_sorted['Cumulative KPI %'] >= t).idxmax() + 1
            conc_summary[f'Partners for {t}%'] = num_partners
            conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)
    return conc_summary, total_partners, df_sorted

def generate_partner_concentration_insight(df, kpi, level_name=None, level_value=None):
    conc_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        conc_summary, total_partners, df_sorted = partner_concentration(bm_df, kpi)
        text = f"{level_name}: {level_value if level_value else bm}\nKPI: {kpi}\nTotal Partners: {total_partners}\n"
        for t in thresholds:
            text += f"- Top {conc_summary.get(f'Partners for {t}%', 0)} partners ({conc_summary.get(f'% of Total Partners for {t}%',0)}%) contribute {t}% of total {kpi}.\n"
        topX = df_sorted.head(top_partners)[['Partner Name_x', kpi, 'Cumulative KPI %']]
        text += "\nTop Partner:\n"
        for _, row in topX.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['Cumulative KPI %']:.2f}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Generate insights ONLY from the data provided — No generic lines or introduction, No assumptions beyond the data.
Write insights in this exact structure:
1. Concentration level 
2. Risk/impact 
3. Action recommendation Limit to 200 words
Limit to 200 words.
"""
        insight = generator.generate_insight(prompt)
        conc_texts[bm] = text + "\nLLM Insight:\n" + insight
    return conc_texts

def generate_leaders_laggers_insight(df, kpi, level_name=None, level_value=None):
    ll_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        perf = bm_df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
        total_perf = perf[kpi].sum() if perf[kpi].sum()!=0 else 1
        leaders = perf.head(top_n).copy()
        leaders['% Share'] = round(leaders[kpi]/total_perf*100,2)
        laggers = perf.tail(top_n).copy()
        laggers['% Share'] = round(laggers[kpi]/total_perf*100,2)
        text = f"{level_name}: {level_value if level_value else bm}\nKPI: {kpi}\nTop Leader:\n"
        for _, row in leaders.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        text += "\nBottom Lagger:\n"
        for _, row in laggers.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Analyze the provided KPI data ONLY. Do NOT give generic advice or introduction, No assumptions beyond the data.
Write insights for {level_name} in this structure:
1. Dependency on leader – identify top partner and quantify impact.
2. Fix for lagger – identify bottom partner and recommend improvement.
3. Impact on {level_name} – explain how top/bottom performers affect overall performance.
Limit output to 150–200 words. Use data values explicitly.
"""
        insight = generator.generate_insight(prompt)
        ll_texts[bm] = text + "\nLLM Insight:\n" + insight
    return ll_texts

def detect_drastic_changes(df):
    change_records = []
    df_sorted = df.sort_values(['BM', 'Partner Name_x', 'FY_Year'])
    for (bm, partner), group in df_sorted.groupby(['BM','Partner Name_x']):
        group = group.sort_values('FY_Year')
        for i in range(1,len(group)):
            prev, curr = group.iloc[i-1], group.iloc[i]
            for kpi in kpi_list:
                prev_val, curr_val = prev[kpi], curr[kpi]
                if pd.isna(prev_val) or pd.isna(curr_val) or prev_val==0: continue
                pct_change = (curr_val-prev_val)/abs(prev_val)*100
                if abs(pct_change)>=threshold_pct_change:
                    change_records.append({
                        'BM': bm, 'Partner Name_x': partner, 'KPI': kpi,
                        'Year From': prev['FY_Year'], 'Year To': curr['FY_Year'],
                        'Previous Value': round(prev_val,2), 'Current Value': round(curr_val,2),
                        '% Change': round(pct_change,2)
                    })
    return pd.DataFrame(change_records)

def generate_drastic_change_insight(df, level_name=None, level_value=None):
    change_df = detect_drastic_changes(df)
    if change_df.empty:
        return {level_value if level_value else "All": "No significant changes detected."}
    
    bm_insights = {}
    for bm, group in change_df.groupby('BM'):
        combined = f"{level_name}: {level_value if level_value else bm}\n" if level_name else f"BM: {bm}\n"
        top_partner = (
            group.groupby('Partner Name_x')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(top_partners)
            .index
        )
        for partner in top_partner:
            partner_data = group[group['Partner Name_x'] == partner]
            prompt = f"""
{level_name}: {level_value if level_value else bm}
Partner: {partner}
Data:
{partner_data}
Analyze KPI changes using ONLY the provided data. No generic lines or introduction, No assumptions beyond the data.
Write insights for {level_name} in this structure:
1. Largest KPI Change – identify the KPI, partner, and magnitude of change.
2. Reason & Risk – explain why this change happened based on data patterns (e.g., low base, spike/drop) and its business risk.
3. Action – give MF-sales-specific steps for {level_name} (partner engagement, mid-tier activation, continuity plans). Avoid generic advice.
Limit to 150–200 words. Use numbers from the data.
"""
            insight = generator.generate_insight(prompt)
            combined += f"\nPartner: {partner}\n{insight}\n" + ("-"*80) + "\n"
        bm_insights[bm] = combined
    return bm_insights

def generate_focus_area_insight(df, level_name=None, level_value=None):
    focus_records = []
    for (bm, partner), group in df.groupby(['BM','Partner Name_x']):
        partner_row = group.iloc[-1]
        gaps = {}
        for kpi in kpi_list:
            target_cols = [col for col in df.columns if 'Target' in col and kpi.split()[0] in col]
            target_val = partner_row[target_cols[0]] if target_cols else np.nan
            gap_pct = ((target_val - partner_row[kpi])/target_val*100 if target_val not in [0, None, np.nan] else 0)
            gaps[kpi] = round(gap_pct,2)
        max_gap = max(gaps.values())
        sorted_gaps = sorted(gaps.items(), key=lambda x:x[1], reverse=True)
        focus_records.append({'BM':bm,'Partner Name_x':partner,'Focus KPIs':sorted_gaps,'Max Gap':max_gap})
    
    focus_df = pd.DataFrame(focus_records)
    top_bm = focus_df.groupby('BM')['Max Gap'].max().sort_values(ascending=False).head(top_bms).index
    filtered = focus_df[focus_df['BM'].isin(top_bm)]
    top_data = (
        filtered.groupby('BM',group_keys=False)
        .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
        .reset_index(drop=True)
    )
    
    bm_insights = {}
    for bm, group in top_data.groupby('BM'):
        bm_insights[bm] = {}
        for _, row in group.iterrows():
            partner = row['Partner Name_x']
            focus_kpis = row['Focus KPIs']
            prompt = f"""
{level_name}: {level_value if level_value else bm}
Partner: {partner}
Data:
{focus_kpis}
Analyze KPIs and gaps using ONLY the provided data. No generic lines or introduction, No assumptions beyond the data.
Write insights for {level_name} in this structure:
1. KPIs to fix – identify top underperforming KPIs per partner.
2. Improvement required – quantify the gap and suggest realistic targets.
3. Action plan – provide specific steps for {level_name} to improve performance.
Limit to 150–200 words. Use numbers from the data.
"""
            insight = generator.generate_insight(prompt)
            bm_insights[bm][partner] = insight
    return bm_insights

# ========== MAIN EXECUTION ==========
final_insights = {}

top_zms = final_df.groupby('ZM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index

for zm in top_zms:
    zm_group = final_df[final_df['ZM'] == zm]
    final_insights[zm] = {}

    # TOP 1 BM under this ZM
    top_bm = zm_group.groupby('BM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index
    bm_df = zm_group[zm_group['BM'].isin(top_bm)]

    final_insights[zm]['ZM_Overview'] = {}
    for kpi in kpi_list:
        final_insights[zm]['ZM_Overview'][kpi] = {
            'Partner Concentration': generate_partner_concentration_insight(bm_df, kpi, level_name="ZM", level_value=zm),
            'Leaders & Laggers': generate_leaders_laggers_insight(bm_df, kpi, level_name="ZM", level_value=zm),
            'Drastic Change': generate_drastic_change_insight(bm_df, level_name="ZM", level_value=zm),
            'Areas to Focus': generate_focus_area_insight(bm_df, level_name="ZM", level_value=zm)
        }

    # BM LEVEL
    for bm in top_bm:
        bm_group = zm_group[zm_group['BM'] == bm]
        final_insights[zm][bm] = {}

        for kpi in kpi_list:
            final_insights[zm][bm][kpi] = {
                'Partner Concentration': generate_partner_concentration_insight(bm_group, kpi, level_name="BM", level_value=bm),
                'Leaders & Laggers': generate_leaders_laggers_insight(bm_group, kpi, level_name="BM", level_value=bm),
                'Drastic Change': generate_drastic_change_insight(bm_group, level_name="BM", level_value=bm),
                'Areas to Focus': generate_focus_area_insight(bm_group, level_name="BM", level_value=bm)
            }

        # TOP 1 RH
        top_rh = (
            bm_group.groupby('Relationship Handler')[kpi_list]
            .sum().sum(axis=1)
            .sort_values(ascending=False)
            .head(1).index
        )

        for rh in top_rh:
            rh_group = bm_group[bm_group['Relationship Handler'] == rh]
            final_insights[zm][bm][rh] = {}
            for kpi in kpi_list:
                final_insights[zm][bm][rh][kpi] = {
                    'Partner Concentration': generate_partner_concentration_insight(rh_group, kpi, level_name="RH", level_value=rh),
                    'Leaders & Laggers': generate_leaders_laggers_insight(rh_group, kpi, level_name="RH", level_value=rh),
                    'Drastic Change': generate_drastic_change_insight(rh_group, level_name="RH", level_value=rh),
                    'Areas to Focus': generate_focus_area_insight(rh_group, level_name="RH", level_value=rh)
                }


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


In [27]:
import json

print(json.dumps(final_insights, indent=4))
def pretty_print_insights(insights, level=0):
    indent = "    " * level
    for key, value in insights.items():
        if isinstance(value, dict):
            print(f"\n{indent}{key}:")
            pretty_print_insights(value, level + 1)
        else:
            print(f"{indent}- {key}: {value}")  

pretty_print_insights(final_insights)


{
    "SARFARAZ ABDULLA PATEL": {
        "ZM_Overview": {
            "Equity Sales": {
                "Partner Concentration": {
                    "AJINKYA BHIMRAO GURAV": "ZM: SARFARAZ ABDULLA PATEL\nKPI: Equity Sales\nTotal Partners: 396\n- Top 5 partners (1.26%) contribute 25% of total Equity Sales.\n- Top 13 partners (3.28%) contribute 50% of total Equity Sales.\n- Top 38 partners (9.6%) contribute 75% of total Equity Sales.\n- Top 47 partners (11.87%) contribute 80% of total Equity Sales.\n- Top 76 partners (19.19%) contribute 90% of total Equity Sales.\n\nTop Partner:\n- ARVIND S LATKAR: 1,103,469,217.74 (8.29%)\n\nLLM Insight:\n1. **Concentration level**: Equity Sales are highly concentrated among a small percentage of partners. The top 1.26% of partners (5 out of 396) generate 25% of total Equity Sales, and the top 19.19% (76 partners) account for 90% of total sales. A single partner, ARVIND S LATKAR, contributes 8.29% of total Equity Sales. 2. **Risk/impact**: Dependence 

In [28]:
pretty_print_insights(final_insights)


SARFARAZ ABDULLA PATEL:

    ZM_Overview:

        Equity Sales:

            Partner Concentration:
                - AJINKYA BHIMRAO GURAV: ZM: SARFARAZ ABDULLA PATEL
KPI: Equity Sales
Total Partners: 396
- Top 5 partners (1.26%) contribute 25% of total Equity Sales.
- Top 13 partners (3.28%) contribute 50% of total Equity Sales.
- Top 38 partners (9.6%) contribute 75% of total Equity Sales.
- Top 47 partners (11.87%) contribute 80% of total Equity Sales.
- Top 76 partners (19.19%) contribute 90% of total Equity Sales.

Top Partner:
- ARVIND S LATKAR: 1,103,469,217.74 (8.29%)

LLM Insight:
1. **Concentration level**: Equity Sales are highly concentrated among a small percentage of partners. The top 1.26% of partners (5 out of 396) generate 25% of total Equity Sales, and the top 19.19% (76 partners) account for 90% of total sales. A single partner, ARVIND S LATKAR, contributes 8.29% of total Equity Sales. 2. **Risk/impact**: Dependence on a small group of top-performing partners pose

###sql

In [102]:
import pandas as pd
from sqlalchemy import create_engine

engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")

def extract_insights(final_insights):
    rows = []

    for zm, zm_block in final_insights.items():                    # Level 1
        for bm, bm_block in zm_block.items():                      # Level 2

            if not isinstance(bm_block, dict):
                continue

            for kpi, kpi_block in bm_block.items():                # Level 3 (kpi_insights)

                if not isinstance(kpi_block, dict):
                    continue

                for insight_type, insight_dict in kpi_block.items():   # Level 4 (Insight_Type)

                    if not isinstance(insight_dict, dict):
                        continue

                    for partner, insight_text in insight_dict.items(): # Partner + Text

                        # Store one clean row
                        rows.append({
                            "Zm": zm,
                            "BM": bm,
                            "Rh": None,
                            "Partner_Name": partner,
                            "kpi_insights": kpi,
                            "Insight_Type": insight_type,
                            "Insights": str(insight_text)
                        })

    return pd.DataFrame(rows)


# ---- EXTRACT THE DATA ----
df = extract_insights(final_insights)

print("Extracted rows:", len(df))
print(df.head(10))

# ---- SAVE TO SQL ----
df.to_sql("insights_table", engine, if_exists="append", index=False)

print("Saved to DB!")


Extracted rows: 48
                       Zm           BM    Rh           Partner_Name  \
0  SARFARAZ ABDULLA PATEL  ZM_Overview  None  AJINKYA BHIMRAO GURAV   
1  SARFARAZ ABDULLA PATEL  ZM_Overview  None  AJINKYA BHIMRAO GURAV   
2  SARFARAZ ABDULLA PATEL  ZM_Overview  None  AJINKYA BHIMRAO GURAV   
3  SARFARAZ ABDULLA PATEL  ZM_Overview  None  AJINKYA BHIMRAO GURAV   
4  SARFARAZ ABDULLA PATEL  ZM_Overview  None  AJINKYA BHIMRAO GURAV   
5  SARFARAZ ABDULLA PATEL  ZM_Overview  None  AJINKYA BHIMRAO GURAV   
6  SARFARAZ ABDULLA PATEL  ZM_Overview  None  AJINKYA BHIMRAO GURAV   
7  SARFARAZ ABDULLA PATEL  ZM_Overview  None  AJINKYA BHIMRAO GURAV   
8  SARFARAZ ABDULLA PATEL  ZM_Overview  None  AJINKYA BHIMRAO GURAV   
9  SARFARAZ ABDULLA PATEL  ZM_Overview  None  AJINKYA BHIMRAO GURAV   

             kpi_insights           Insight_Type  \
0            Equity Sales  Partner Concentration   
1            Equity Sales      Leaders & Laggers   
2            Equity Sales         Drastic C

In [104]:
import pandas as pd
from sqlalchemy import create_engine

engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")


# ---------------------- SAFE HELPER ----------------------
def safe_dict(x):
    """Convert non-dict (string, None, list) into a dict so .items() never fails."""
    if isinstance(x, dict):
        return x
    if x is None:
        return {"ALL": ""}
    return {"ALL": str(x)}


# ---------------------- MAIN EXTRACTION ----------------------
def extract_insights(final_insights):
    rows = []

    for zm, zm_block in final_insights.items():  # ZM level
        for key, value in zm_block.items():

            # -------------------------------------------------
            # CASE 1: ZM LEVEL (key = "ZM_Overview")
            # -------------------------------------------------
            if key == "ZM_Overview":
                bm = None
                rh = None

                for kpi, kpi_block in value.items():
                    for insight_type, insight_dict in kpi_block.items():

                        insight_dict = safe_dict(insight_dict)

                        for partner, insight_text in insight_dict.items():
                            rows.append({
                                "Zm": zm,
                                "BM": bm,
                                "Rh": rh,
                                "Partner_Name": partner,
                                "kpi_insights": kpi,
                                "Insight_Type": insight_type,
                                "Insights": str(insight_text)
                            })
                continue

            # -------------------------------------------------
            # CASE 2: BM LEVEL
            # -------------------------------------------------
            bm = key
            rh = None

            if not isinstance(value, dict):
                continue

            for kpi, kpi_block in value.items():

                # -------------------------------------------------
                # CASE 3: RH LEVEL  (if kpi_block contains RH dicts)
                # -------------------------------------------------
                if isinstance(kpi_block, dict) and all(isinstance(v, dict) for v in kpi_block.values()):
                    for rh_name, rh_block in kpi_block.items():
                        rh = rh_name

                        for insight_type, insight_dict in rh_block.items():

                            insight_dict = safe_dict(insight_dict)

                            for partner, insight_text in insight_dict.items():
                                rows.append({
                                    "Zm": zm,
                                    "BM": bm,
                                    "Rh": rh,
                                    "Partner_Name": partner,
                                    "kpi_insights": kpi,
                                    "Insight_Type": insight_type,
                                    "Insights": str(insight_text)
                                })

                # -------------------------------------------------
                # CASE 4: BM LEVEL INSIGHTS (NO RH)
                # -------------------------------------------------
                else:
                    for insight_type, insight_dict in kpi_block.items():

                        insight_dict = safe_dict(insight_dict)

                        for partner, insight_text in insight_dict.items():
                            rows.append({
                                "Zm": zm,
                                "BM": bm,
                                "Rh": None,
                                "Partner_Name": partner,
                                "kpi_insights": kpi,
                                "Insight_Type": insight_type,
                                "Insights": str(insight_text)
                            })

    return pd.DataFrame(rows)


# ---------------------- EXTRACT + SAVE ----------------------
df = extract_insights(final_insights)
print(df.head(20))

df.to_sql("insights_table", engine, if_exists="append", index=False)
print("Saved!")


                        Zm                     BM                     Rh  \
0   SARFARAZ ABDULLA PATEL                   None                   None   
1   SARFARAZ ABDULLA PATEL                   None                   None   
2   SARFARAZ ABDULLA PATEL                   None                   None   
3   SARFARAZ ABDULLA PATEL                   None                   None   
4   SARFARAZ ABDULLA PATEL                   None                   None   
5   SARFARAZ ABDULLA PATEL                   None                   None   
6   SARFARAZ ABDULLA PATEL                   None                   None   
7   SARFARAZ ABDULLA PATEL                   None                   None   
8   SARFARAZ ABDULLA PATEL                   None                   None   
9   SARFARAZ ABDULLA PATEL                   None                   None   
10  SARFARAZ ABDULLA PATEL                   None                   None   
11  SARFARAZ ABDULLA PATEL                   None                   None   
12  SARFARAZ

In [101]:
import json

def print_structure(data):
    print(json.dumps(data, indent=2, ensure_ascii=False))

print_structure(final_insights)


{
  "SARFARAZ ABDULLA PATEL": {
    "ZM_Overview": {
      "Equity Sales": {
        "Partner Concentration": {
          "AJINKYA BHIMRAO GURAV": "ZM: SARFARAZ ABDULLA PATEL\nKPI: Equity Sales\nTotal Partners: 396\n- Top 5 partners (1.26%) contribute 25% of total Equity Sales.\n- Top 13 partners (3.28%) contribute 50% of total Equity Sales.\n- Top 38 partners (9.6%) contribute 75% of total Equity Sales.\n- Top 47 partners (11.87%) contribute 80% of total Equity Sales.\n- Top 76 partners (19.19%) contribute 90% of total Equity Sales.\n\nTop Partner:\n- ARVIND S LATKAR: 1,103,469,217.74 (8.29%)\n\nLLM Insight:\nHere's an analysis of ZM's Equity Sales performance based on the provided data: 1. **Concentration level**: A significant portion of Equity Sales is concentrated among a small percentage of partners. The top 5 partners contribute 25%, and the top 13 partners contribute 50% of total Equity Sales. ARVIND S LATKAR alone generates 8.29% of total sales. 2. **Risk/impact**: High relian

In [105]:
import pandas as pd
from sqlalchemy import create_engine

engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")


# ---------------------- SAFE HELPER ----------------------
def safe_dict(x):
    """Convert non-dict (string, None, list) into a dict so .items() never fails."""
    if isinstance(x, dict):
        return x
    if x is None:
        return {"ALL": ""}
    return {"ALL": str(x)}


# ---------------------- MAIN EXTRACTION ----------------------
def extract_insights(final_insights):
    rows = []

    for zm, zm_block in final_insights.items():

        for key, value in zm_block.items():

            # ---------------------------------------------------------
            # CASE 1: ZM OVERVIEW (key == "ZM_Overview")
            # ---------------------------------------------------------
            if key == "ZM_Overview":
                for kpi, kpi_block in value.items():
                    for insight_type, insight_dict in kpi_block.items():

                        insight_dict = safe_dict(insight_dict)

                        for partner, insight_text in insight_dict.items():
                            rows.append({
                                "Zm": zm,
                                "BM": None,       # ZM level
                                "Rh": None,       # No RH
                                "Partner_Name": partner,
                                "kpi_insights": kpi,
                                "Insight_Type": insight_type,
                                "Insights": str(insight_text)
                            })
                continue

            # ---------------------------------------------------------
            # CASE 2: BM LEVEL
            # ---------------------------------------------------------
            bm = key  # This key is BM NAME
            if not isinstance(value, dict):
                continue

            for kpi, kpi_block in value.items():

                # ---------------------------------------------------------
                # CASE 3: RH LEVEL (if values inside kpi are RH dicts)
                # ---------------------------------------------------------
                if isinstance(kpi_block, dict) and all(isinstance(v, dict) for v in kpi_block.values()):

                    for rh_name, rh_block in kpi_block.items():   # rh_name = RH

                        for insight_type, insight_dict in rh_block.items():

                            insight_dict = safe_dict(insight_dict)

                            for partner, insight_text in insight_dict.items():
                                rows.append({
                                    "Zm": zm,
                                    "BM": bm,       # BM name preserved
                                    "Rh": rh_name,  # RH name added
                                    "Partner_Name": partner,
                                    "kpi_insights": kpi,
                                    "Insight_Type": insight_type,
                                    "Insights": str(insight_text)
                                })

                # ---------------------------------------------------------
                # CASE 4: BM LEVEL INSIGHTS (NO RH)
                # ---------------------------------------------------------
                else:
                    for insight_type, insight_dict in kpi_block.items():

                        insight_dict = safe_dict(insight_dict)

                        for partner, insight_text in insight_dict.items():
                            rows.append({
                                "Zm": zm,
                                "BM": bm,   # BM name
                                "Rh": None, # No RH
                                "Partner_Name": partner,
                                "kpi_insights": kpi,
                                "Insight_Type": insight_type,
                                "Insights": str(insight_text)
                            })

    return pd.DataFrame(rows)


# ---------------------- RUN & SSAVE ----------------------
df = extract_insights(final_insights)
print(df.head(20))

df.to_sql("insights_table", engine, if_exists="append", index=False)
print("Saved!")


                        Zm                     BM                     Rh  \
0   SARFARAZ ABDULLA PATEL                   None                   None   
1   SARFARAZ ABDULLA PATEL                   None                   None   
2   SARFARAZ ABDULLA PATEL                   None                   None   
3   SARFARAZ ABDULLA PATEL                   None                   None   
4   SARFARAZ ABDULLA PATEL                   None                   None   
5   SARFARAZ ABDULLA PATEL                   None                   None   
6   SARFARAZ ABDULLA PATEL                   None                   None   
7   SARFARAZ ABDULLA PATEL                   None                   None   
8   SARFARAZ ABDULLA PATEL                   None                   None   
9   SARFARAZ ABDULLA PATEL                   None                   None   
10  SARFARAZ ABDULLA PATEL                   None                   None   
11  SARFARAZ ABDULLA PATEL                   None                   None   
12  SARFARAZ

In [45]:
list(final_df.columns)


['Sr No.',
 'Partner Code',
 'Partner Name_x',
 'Center_x',
 'Category_x',
 'Relationship Handler',
 'Investment Net Sales Target',
 'Investment Net Sales Achievement',
 'Investment Net Sales % Achievement',
 'Equity Sales',
 'MIP Sales',
 'Gold Sales',
 'Sales in Physical Assets ',
 'Sales in Direct Equity ',
 'FD + Bond (Primary Market) Sales',
 'Secondary Market Bond Sales',
 'Net Sales Through Realty',
 'Net NJ PMS Sales',
 'Net Non-NJ PMS Sales',
 'Net Sales through MARS',
 'SIP Sales Target',
 'SIP Sales Achievement',
 'SIP Sales % Achievement',
 'Fresh Gross SIP Sales',
 'SIP Closure / Termination',
 'FY_Year',
 'Sr No',
 'Broker Code',
 'Doer Name_x',
 'Doer Type_x',
 'Equity Net Sales_Target',
 'Equity Net Sales_Achievement',
 'Equity Net Sales_% Achievement',
 'Insurance_Target',
 'Insurance_Achievement',
 'Insurance_% Achievement',
 'SIP Sales_Target',
 'SIP Sales_Achievement',
 'SIP Sales_% Achievement',
 'Client Acquisition_Target',
 'Client Acquisition_Achievement',
 'Cli

In [30]:
final_df.head(5)

Unnamed: 0,Sr No.,Partner Code,Partner Name_x,Center_x,Category_x,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year,Sr No,Broker Code,Doer Name_x,Doer Type_x,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement_x,ZM,SRM,RM,BM,Partner Name_y,Category_y,Doer Name_y,Doer Type_y,Center_y,Age in NJ,Age in NJ Target,Total AUM FY 23-24 Q4 YTD,Equity+ Hyb AUM FY 23-24 Q4 YTD,LIVE SIP FY 23-24 Q4 YTD,Total Net Sales\nFY 23-24 YTD,Equity Net Sales\nFY 23-24 YTD,Net SIP\nFY 23-24 YTD,MARS AUM FY 23-24 YTD,MARS Net Sales FY 23-24 YTD,PMS AUM FY 23-24 YTD,Net Sales FY 23-24 YTD,Clients Acquired FY 23-24 YTD,Live Accounts FY 23-24 YTD,Saturday School (YTD) FY 23-24 Q4 YTD,Investment\nSaturday School (YTD) FY 23-24 Q4 YTD,Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD,Total Group FY 23-24 Q4 YTD,Group Covered FY 23-24 Q4 YTD,% Covered,Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr),Total Reviews\nFY 23-24 Q4 YTD,Amount FY 23-24 Q4 YTD\n(in Cr),Flexicap Target\nFY 23-24 Q4 YTD\n(in Cr),Flexicap Ach\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Target\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Ach\nFY 23-24 Q4 YTD\n(in Cr),Target Qty (FY 23-24 Q4 YTD)\n(in Cr),Order Qty\n(FY 23-24 Q4 YTD)\n(in Cr),Equity Net Sales% Achievement,SIP Sales% Achievement,Client Acquisition% Achievement,Insurance% Achievement,LAS% Achievement,Total % Achievement_y,Status,Total AUM FY 24-25 Q4 YTD,Equity AUM FY 24-25 Q4 YTD,LIVE SIP FY 24-25 Q4 YTD,Total Net Sales\nFY 24-25 Q4 YTD,Equity Net Sales\nFY 24-25 Q4 YTD,Net SIP\nFY 24-25 Q4 YTD,MARS AUM FY 24-25 Q4 YTD,MARS Net Sales FY 24-25 Q4 YTD,PMS AUM FY 24-25 Q4 YTD,Net Sales FY 24-25 Q4 YTD,Clients Acquired FY 24-25 Q4 YTD,Live Accounts (Non-D) FY 24-25 Q4 YTD,Saturday School (YTD) FY 24-25 Q4 YTD,Investment\nSaturday School (YTD) FY 24-25 Q4 YTD,Insurance\nSaturday School (YTD) FY 24-25 Q4 YTD,Total Group FY 24-25 Q4 YTD,Group Covered FY 24-25 Q4 YTD,Total Reviews\nFY 24-25 Q4 YTD,Non-NJ AUM\nFY 24-25 Q4 YTD,Amount FY 24-25 Q4 YTD,Flexicap Target\nFY 24-25 Q4 YTD,Flexicap Ach\nFY 24-25 Q4 YTD,AMC NS Target\nFY 24-25 Q4 YTD,AMC NS Ach\nFY 24-25 Q4 YTD,Target Qty (FY 24-25 Q4 YTD)\n(in Cr),Order Qty\n(FY 24-25 Q4 YTD)\n(in Cr),MARS TO EQ AUM
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,1,23676,SUBRATA MAITY,Fundz Express,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,2,20361,SUBRATA MAITY,Fundz Express,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,3,24695,CHINTU KUMAR SHAW,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1200000,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,27000,0.0,0.0,0.0,0.0,2023,3,24695,SUBRATA MAITY,Fundz Express,1200000,0.0,0.0,,,,27000,0.0,0.0,16,3.0,18.75,0.0,0,,0.0,0.0,2.81,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,4,24040,GOURAB PURKAIT,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,26998.7,1.8,26998.7,0,0,0,0,0,0,0,0,0,0,33750,10499.49,31.11,10499.49,0.0,2023,4,24040,SUBRATA MAITY,Fundz Express,1500000,26998.7,1.8,,,,33750,10499.49,31.11,20,3.0,15.0,3.11,0,0.0,6999.66,2.33,14.59,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,5,25776,RAJ KISHORE BARIK,24 SOUTH PARGANA,NON D,SUBRATA MAITY,600000,496499.92,82.75,1499.92,0,0,0,0,0,0,0,0,0,495000,13500,1499.92,11.11,1499.92,0.0,2023,5,25776,SUBRATA MAITY,Fundz Express,600000,496499.92,82.75,,,,13500,1499.92,11.11,8,3.0,37.5,0.0,0,,0.0,0.0,40.82,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [23]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ================= GEMINI SETUP =================
gemini_api_key = "AIzaSyADwv71iu5J5m9TK0oV2lDgZxQrTLkg1K0"
genai.configure(api_key=gemini_api_key)

class DeepInsightGenerator:
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str, max_words: int = 200) -> str:
        try:
            response = self.model.generate_content(prompt)
            words = response.text.strip().split()
            return " ".join(words[:max_words])
        except Exception as e:
            return f"Error generating insight: {e}"

generator = DeepInsightGenerator()

# ================= CONFIG =================
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]
thresholds = [25, 50, 75, 80, 90]

top_bms = 1
top_partners = 1
top_n = 1  
threshold_pct_change = 50  

kpi_prompts = {
    'Equity Sales': "Analyze Equity Sales performance.",
    'SIP Sales Achievement': "Analyze SIP Sales Achievement performance.",
    'Net Sales through MARS': "Analyze Net Sales through MARS performance.",
    'Investment Net Sales Achievement': "Analyze Investment Net Sales Achievement."
}

# ================= HELPER FUNCTIONS =================
def partner_concentration(df, kpi):
    df_sorted = df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
    total_kpi = df_sorted[kpi].sum()
    df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
    df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100 if total_kpi != 0 else 0
    total_partners = len(df_sorted)
    conc_summary = {}
    for t in thresholds:
        if not df_sorted.empty:
            num_partners = (df_sorted['Cumulative KPI %'] >= t).idxmax() + 1
            conc_summary[f'Partners for {t}%'] = num_partners
            conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)
    return conc_summary, total_partners, df_sorted

def generate_partner_concentration_insight(df, kpi, level_name=None, level_value=None):
    conc_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        conc_summary, total_partners, df_sorted = partner_concentration(bm_df, kpi)
        text = f"{level_name}: {level_value if level_value else bm}\nKPI: {kpi}\nTotal Partners: {total_partners}\n"
        for t in thresholds:
            text += f"- Top {conc_summary.get(f'Partners for {t}%', 0)} partners ({conc_summary.get(f'% of Total Partners for {t}%',0)}%) contribute {t}% of total {kpi}.\n"
        topX = df_sorted.head(top_partners)[['Partner Name_x', kpi, 'Cumulative KPI %']]
        text += "\nTop Partner:\n"
        for _, row in topX.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['Cumulative KPI %']:.2f}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Generate insights ONLY from the data provided — do not give generic advices.
Write insights in this exact structure:
1. Concentration level 
2. Risk/impact 
3. Action recommendation Limit to 200 words
Limit to 200 words.
"""
        insight = generator.generate_insight(prompt)
        conc_texts[bm] = text + "\nLLM Insight:\n" + insight
    return conc_texts

def generate_leaders_laggers_insight(df, kpi, level_name=None, level_value=None):
    ll_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        perf = bm_df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
        total_perf = perf[kpi].sum() if perf[kpi].sum()!=0 else 1
        leaders = perf.head(top_n).copy()
        leaders['% Share'] = round(leaders[kpi]/total_perf*100,2)
        laggers = perf.tail(top_n).copy()
        laggers['% Share'] = round(laggers[kpi]/total_perf*100,2)
        text = f"{level_name}: {level_value if level_value else bm}\nKPI: {kpi}\nTop Leader:\n"
        for _, row in leaders.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        text += "\nBottom Lagger:\n"
        for _, row in laggers.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Analyze the provided KPI data ONLY. Do NOT give generic advice.
Write insights for {level_name} in this structure:
1. Dependency on leader – identify top partner and quantify impact.
2. Fix for lagger – identify bottom partner and recommend improvement.
3. Impact on {level_name} – explain how top/bottom performers affect overall performance.
Limit output to 150–200 words. Use data values explicitly.
"""
        insight = generator.generate_insight(prompt)
        ll_texts[bm] = text + "\nLLM Insight:\n" + insight
    return ll_texts

def detect_drastic_changes(df):
    change_records = []
    df_sorted = df.sort_values(['BM', 'Partner Name_x', 'FY_Year'])
    for (bm, partner), group in df_sorted.groupby(['BM','Partner Name_x']):
        group = group.sort_values('FY_Year')
        for i in range(1,len(group)):
            prev, curr = group.iloc[i-1], group.iloc[i]
            for kpi in kpi_list:
                prev_val, curr_val = prev[kpi], curr[kpi]
                if pd.isna(prev_val) or pd.isna(curr_val) or prev_val==0: continue
                pct_change = (curr_val-prev_val)/abs(prev_val)*100
                if abs(pct_change)>=threshold_pct_change:
                    change_records.append({
                        'BM': bm, 'Partner Name_x': partner, 'KPI': kpi,
                        'Year From': prev['FY_Year'], 'Year To': curr['FY_Year'],
                        'Previous Value': round(prev_val,2), 'Current Value': round(curr_val,2),
                        '% Change': round(pct_change,2)
                    })
    return pd.DataFrame(change_records)

def generate_drastic_change_insight(df, level_name=None, level_value=None):
    change_df = detect_drastic_changes(df)
    if change_df.empty:
        return {level_value if level_value else "All": "No significant changes detected."}
    
    bm_insights = {}
    for bm, group in change_df.groupby('BM'):
        combined = f"{level_name}: {level_value if level_value else bm}\n" if level_name else f"BM: {bm}\n"
        top_partner = (
            group.groupby('Partner Name_x')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(top_partners)
            .index
        )
        for partner in top_partner:
            partner_data = group[group['Partner Name_x'] == partner]
            prompt = f"""
{level_name}: {level_value if level_value else bm}
Partner: {partner}
Data:
{partner_data}
Analyze KPI changes using ONLY the provided data.
Write insights for {level_name} in this structure:
1. Largest KPI Change – identify the KPI, partner, and magnitude of change.
2. Reason & Risk – explain why this change happened based on data patterns (e.g., low base, spike/drop) and its business risk.
3. Action – give MF-sales-specific steps for {level_name} (partner engagement, mid-tier activation, continuity plans). Avoid generic advice.
Limit to 150–200 words. Use numbers from the data.
"""
            insight = generator.generate_insight(prompt)
            combined += f"\nPartner: {partner}\n{insight}\n" + ("-"*80) + "\n"
        bm_insights[bm] = combined
    return bm_insights

def generate_focus_area_insight(df, level_name=None, level_value=None):
    focus_records = []
    for (bm, partner), group in df.groupby(['BM','Partner Name_x']):
        partner_row = group.iloc[-1]
        gaps = {}
        for kpi in kpi_list:
            target_cols = [col for col in df.columns if 'Target' in col and kpi.split()[0] in col]
            target_val = partner_row[target_cols[0]] if target_cols else np.nan
            gap_pct = ((target_val - partner_row[kpi])/target_val*100 if target_val not in [0, None, np.nan] else 0)
            gaps[kpi] = round(gap_pct,2)
        max_gap = max(gaps.values())
        sorted_gaps = sorted(gaps.items(), key=lambda x:x[1], reverse=True)
        focus_records.append({'BM':bm,'Partner Name_x':partner,'Focus KPIs':sorted_gaps,'Max Gap':max_gap})
    
    focus_df = pd.DataFrame(focus_records)
    top_bm = focus_df.groupby('BM')['Max Gap'].max().sort_values(ascending=False).head(top_bms).index
    filtered = focus_df[focus_df['BM'].isin(top_bm)]
    top_data = (
        filtered.groupby('BM',group_keys=False)
        .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
        .reset_index(drop=True)
    )
    
    bm_insights = {}
    for bm, group in top_data.groupby('BM'):
        bm_insights[bm] = {}
        for _, row in group.iterrows():
            partner = row['Partner Name_x']
            focus_kpis = row['Focus KPIs']
            prompt = f"""
{level_name}: {level_value if level_value else bm}
Partner: {partner}
Data:
{focus_kpis}
Analyze KPIs and gaps using ONLY the provided data.
Write insights for {level_name} in this structure:
1. KPIs to fix – identify top underperforming KPIs per partner.
2. Improvement required – quantify the gap and suggest realistic targets.
3. Action plan – provide specific steps for {level_name} to improve performance.
Limit to 150–200 words. Use numbers from the data.
"""
            insight = generator.generate_insight(prompt)
            bm_insights[bm][partner] = insight
    return bm_insights

# ========== MAIN EXECUTION ==========
final_insights = {}

top_zms = final_df.groupby('ZM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index

for zm in top_zms:
    zm_group = final_df[final_df['ZM'] == zm]
    final_insights[zm] = {}

    # TOP 1 BM under this ZM
    top_bm = zm_group.groupby('BM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index
    bm_df = zm_group[zm_group['BM'].isin(top_bm)]

    final_insights[zm]['ZM_Overview'] = {}
    for kpi in kpi_list:
        final_insights[zm]['ZM_Overview'][kpi] = {
            'Partner Concentration': generate_partner_concentration_insight(bm_df, kpi, level_name="ZM", level_value=zm),
            'Leaders & Laggers': generate_leaders_laggers_insight(bm_df, kpi, level_name="ZM", level_value=zm),
            'Drastic Change': generate_drastic_change_insight(bm_df, level_name="ZM", level_value=zm),
            'Areas to Focus': generate_focus_area_insight(bm_df, level_name="ZM", level_value=zm)
        }

    # BM LEVEL
    for bm in top_bm:
        bm_group = zm_group[zm_group['BM'] == bm]
        final_insights[zm][bm] = {}

        for kpi in kpi_list:
            final_insights[zm][bm][kpi] = {
                'Partner Concentration': generate_partner_concentration_insight(bm_group, kpi, level_name="BM", level_value=bm),
                'Leaders & Laggers': generate_leaders_laggers_insight(bm_group, kpi, level_name="BM", level_value=bm),
                'Drastic Change': generate_drastic_change_insight(bm_group, level_name="BM", level_value=bm),
                'Areas to Focus': generate_focus_area_insight(bm_group, level_name="BM", level_value=bm)
            }

        # TOP 1 RH
        top_rh = (
            bm_group.groupby('Relationship Handler')[kpi_list]
            .sum().sum(axis=1)
            .sort_values(ascending=False)
            .head(1).index
        )

        for rh in top_rh:
            rh_group = bm_group[bm_group['Relationship Handler'] == rh]
            final_insights[zm][bm][rh] = {}
            for kpi in kpi_list:
                final_insights[zm][bm][rh][kpi] = {
                    'Partner Concentration': generate_partner_concentration_insight(rh_group, kpi, level_name="RH", level_value=rh),
                    'Leaders & Laggers': generate_leaders_laggers_insight(rh_group, kpi, level_name="RH", level_value=rh),
                    'Drastic Change': generate_drastic_change_insight(rh_group, level_name="RH", level_value=rh),
                    'Areas to Focus': generate_focus_area_insight(rh_group, level_name="RH", level_value=rh)
                }


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


In [24]:
import json

print(json.dumps(final_insights, indent=4))
def pretty_print_insights(insights, level=0):
    indent = "    " * level
    for key, value in insights.items():
        if isinstance(value, dict):
            print(f"\n{indent}{key}:")
            pretty_print_insights(value, level + 1)
        else:
            print(f"{indent}- {key}: {value}")  

pretty_print_insights(final_insights)


{
    "SARFARAZ ABDULLA PATEL": {
        "ZM_Overview": {
            "Equity Sales": {
                "Partner Concentration": {
                    "AJINKYA BHIMRAO GURAV": "ZM: SARFARAZ ABDULLA PATEL\nKPI: Equity Sales\nTotal Partners: 396\n- Top 5 partners (1.26%) contribute 25% of total Equity Sales.\n- Top 13 partners (3.28%) contribute 50% of total Equity Sales.\n- Top 38 partners (9.6%) contribute 75% of total Equity Sales.\n- Top 47 partners (11.87%) contribute 80% of total Equity Sales.\n- Top 76 partners (19.19%) contribute 90% of total Equity Sales.\n\nTop Partner:\n- ARVIND S LATKAR: 1,103,469,217.74 (8.29%)\n\nLLM Insight:\n**Analysis of ZM: SARFARAZ ABDULLA PATEL's Equity Sales Performance** 1. **Concentration Level:** Equity Sales are heavily concentrated among a small percentage of partners. The top 1.26% of partners drive 25% of sales, and 19.19% account for 90% of total sales. ARVIND S LATKAR alone contributes 8.29% of total sales. 2. **Risk/Impact:** Significant r

In [25]:
pretty_print_insights(final_insights)


SARFARAZ ABDULLA PATEL:

    ZM_Overview:

        Equity Sales:

            Partner Concentration:
                - AJINKYA BHIMRAO GURAV: ZM: SARFARAZ ABDULLA PATEL
KPI: Equity Sales
Total Partners: 396
- Top 5 partners (1.26%) contribute 25% of total Equity Sales.
- Top 13 partners (3.28%) contribute 50% of total Equity Sales.
- Top 38 partners (9.6%) contribute 75% of total Equity Sales.
- Top 47 partners (11.87%) contribute 80% of total Equity Sales.
- Top 76 partners (19.19%) contribute 90% of total Equity Sales.

Top Partner:
- ARVIND S LATKAR: 1,103,469,217.74 (8.29%)

LLM Insight:
**Analysis of ZM: SARFARAZ ABDULLA PATEL's Equity Sales Performance** 1. **Concentration Level:** Equity Sales are heavily concentrated among a small percentage of partners. The top 1.26% of partners drive 25% of sales, and 19.19% account for 90% of total sales. ARVIND S LATKAR alone contributes 8.29% of total sales. 2. **Risk/Impact:** Significant reliance on a small group makes the portfolio vul

In [56]:
pretty_print_insights(final_insights)


SARFARAZ ABDULLA PATEL:

    ZM_Overview:

        Equity Sales:

            Partner Concentration:
                - AJINKYA BHIMRAO GURAV: ZM: SARFARAZ ABDULLA PATEL
KPI: Equity Sales
Total Partners: 396
- Top 5 partners (1.26%) contribute 25% of total Equity Sales.
- Top 13 partners (3.28%) contribute 50% of total Equity Sales.
- Top 38 partners (9.6%) contribute 75% of total Equity Sales.
- Top 47 partners (11.87%) contribute 80% of total Equity Sales.
- Top 76 partners (19.19%) contribute 90% of total Equity Sales.

Top Partner:
- ARVIND S LATKAR: 1,103,469,217.74 (8.29%)

LLM Insight:
Here's an analysis of ZM's Equity Sales performance based on the provided data: 1. **Concentration level**: A significant portion of Equity Sales is concentrated among a small percentage of partners. The top 5 partners contribute 25%, and the top 13 partners contribute 50% of total Equity Sales. ARVIND S LATKAR alone generates 8.29% of total sales. 2. **Risk/impact**: High reliance on a small grou

In [None]:
###new logic with kpis

In [167]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ================= GEMINI SETUP =================
gemini_api_key = "AIzaSyADwv71iu5J5m9TK0oV2lDgZxQrTLkg1K0"
genai.configure(api_key=gemini_api_key)

class DeepInsightGenerator:
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str, max_words: int = 200) -> str:
        try:
            response = self.model.generate_content(prompt)
            # Some Gemini responses return .text or .candidates; handle safely
            text = getattr(response, "text", None) or getattr(response, "content", None) or str(response)
            words = text.strip().split()
            return " ".join(words[:max_words])
        except Exception as e:
            return f"Error generating insight: {e}"

generator = DeepInsightGenerator()

# ================= CONFIG =================
# keep EXACT as user requested
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]

# thresholds & parameters
thresholds = [25, 50, 75, 80, 90]
top_bms = 1
top_partners = 1
top_n = 1  
threshold_pct_change = 50  

kpi_prompts = {
    'Equity Sales': "Analyze Equity Sales performance.",
    'SIP Sales Achievement': "Analyze SIP Sales Achievement performance.",
    'Net Sales through MARS': "Analyze Net Sales through MARS performance.",
    'Investment Net Sales Achievement': "Analyze Investment Net Sales Achievement."
}

# ================= COLUMN MAPPING (FY24-25 latest columns you provided) =================
# Map friendly KPI names used in "extra" analysis to actual column names in dataframe
COLUMN_MAP = {
    # PMS
    "PMS AUM": "PMS AUM FY 24-25 Q4 YTD",
    "PMS Net Sales": "Net Sales FY 24-25 Q4 YTD",

    # MARS
    "MARS AUM": "MARS AUM FY 24-25 Q4 YTD",
    "MARS Net Sales": "MARS Net Sales FY 24-25 Q4 YTD",

    # Clients
    "Clients Acquired": "Clients Acquired FY 24-25 Q4 YTD",
    "Client Acquisition_% Achievement": "Client Acquisition_% Achievement",

    # Saturday School
    "Saturday School": "Saturday School (YTD) FY 24-25 Q4 YTD",
    "Investment Saturday School": "Investment Saturday School (YTD) FY 24-25 Q4 YTD",
    "Insurance Saturday School": "Insurance Saturday School (YTD) FY 24-25 Q4 YTD",

    # PRU (not provided) - user didn't provide PRU column so leave commented
    # "PRU Reviews": "Total Reviews FY 24-25 Q4 YTD",

    # AMC
    "AMC NS Target": "AMC NS Target FY 24-25",
    "AMC NS Ach": "AMC NS Ach FY 24-25",

    # LAS
    "LAS_Target": "LAS_Target",
    "LAS_Achievement": "LAS_Achievement",
    "LAS_% Achievement": "LAS_% Achievement",

    # Sales / Correlation
    "Equity Net Sales": "Equity Net Sales FY 24-25 Q4 YTD",
    "Total Net Sales": "Total Net Sales FY 24-25",
    "Net SIP": "Net SIP FY 24-25 Q4 YTD",

    # AUM
    "Total AUM": "Total AUM FY 24-25 Q4 YTD",
    "Equity AUM": "Equity AUM FY 24-25 Q4 YTD",
}

# helper to get partner column name (support both variants)
def get_partner_col(df):
    if 'Partner Name_x' in df.columns:
        return 'Partner Name_x'
    elif 'Partner Name' in df.columns:
        return 'Partner Name'
    else:
        raise KeyError("Partner Name column not found (expected 'Partner Name_x' or 'Partner Name').")

def get_bm_col(df):
    for c in ['BM','bm','Branch Manager']:
        if c in df.columns:
            return c
    raise KeyError("BM column not found (expected 'BM').")

def get_zm_col(df):
    for c in ['ZM','Zm','zm']:
        if c in df.columns:
            return c
    raise KeyError("ZM column not found (expected 'ZM').")

def get_rh_col(df):
    for c in ['Relationship Handler','RH','relationship handler']:
        if c in df.columns:
            return c
    # fallback to Doer Name if present
    if 'Doer Name' in df.columns:
        return 'Doer Name'
    raise KeyError("Relationship Handler column not found (expected 'Relationship Handler').")

# ================= HELPER FUNCTIONS =================
def safe_col(df, colname):
    """Return column name if found in df; else None"""
    return colname if colname in df.columns else None

def partner_concentration(df, col):
    """
    Returns conc_summary (dict), total_partners, and a sorted dataframe.
    col should be the actual column name in df (numeric).
    """
    partner_col = get_partner_col(df)
    if col not in df.columns:
        return {}, 0, pd.DataFrame()
    df_sorted = df.groupby(partner_col)[col].sum().sort_values(ascending=False).reset_index().rename(columns={col: 'value'})
    total_kpi = df_sorted['value'].sum()
    if total_kpi == 0 or df_sorted.empty:
        df_sorted['Cumulative KPI'] = 0
        df_sorted['Cumulative KPI %'] = 0
    else:
        df_sorted['Cumulative KPI'] = df_sorted['value'].cumsum()
        df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100
    total_partners = len(df_sorted)
    conc_summary = {}
    for t in thresholds:
        if not df_sorted.empty and total_partners>0:
            # find index (first position where cumulative >= t)
            idxs = df_sorted.index[df_sorted['Cumulative KPI %'] >= t].tolist()
            num_partners = (idxs[0] + 1) if idxs else total_partners
            conc_summary[f'Partners for {t}%'] = int(num_partners)
            conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)
        else:
            conc_summary[f'Partners for {t}%'] = 0
            conc_summary[f'% of Total Partners for {t}%'] = 0.0
    return conc_summary, total_partners, df_sorted

def generate_partner_concentration_insight(df, col, level_name=None, level_value=None, prompt_header=None):
    """
    df: dataframe subset (e.g., for BM)
    col: actual column name in df (e.g., 'Net Sales FY 24-25 Q4 YTD')
    """
    conc_texts = {}
    bm_col = get_bm_col(df)
    partner_col = get_partner_col(df)

    # if BM not present (e.g., we're at BM level), use unique value
    if bm_col not in df.columns:
        bm_list = [level_value] if level_value else df[bm_col].unique().tolist()
    else:
        bm_list = df[bm_col].dropna().unique().tolist()

    for bm in bm_list:
        bm_df = df[df[bm_col] == bm] if bm_col in df.columns else df.copy()
        conc_summary, total_partners, df_sorted = partner_concentration(bm_df, col)
        text = f"{level_name}: {level_value if level_value else bm}\nKPI Column: {col}\nTotal Partners: {total_partners}\n"
        for t in thresholds:
            text += f"- Top {conc_summary.get(f'Partners for {t}%', 0)} partners ({conc_summary.get(f'% of Total Partners for {t}%',0)}%) contribute {t}% of total {col}.\n"
        topX = df_sorted.head(top_partners)[[partner_col, 'value', 'Cumulative KPI %']] if not df_sorted.empty else pd.DataFrame()
        text += "\nTop Partner:\n"
        for _, row in topX.iterrows():
            text += f"- {row[partner_col]}: {row['value']:,.2f} ({row['Cumulative KPI %']:.2f}%)\n"
        prompt = f"""
{prompt_header or ('Analyze ' + col)}
Data:
{text}
Generate insights ONLY from the data provided — do not give generic advices.
Write insights in this exact structure:
1. Concentration level 
2. Risk/impact 
3. Action recommendation
Limit to 200 words.
"""
        insight = generator.generate_insight(prompt)
        conc_texts[bm] = text + "\nLLM Insight:\n" + insight
    return conc_texts

def generate_leaders_laggers_insight(df, col, level_name=None, level_value=None, prompt_header=None):
    ll_texts = {}
    bm_col = get_bm_col(df)
    partner_col = get_partner_col(df)
    if col not in df.columns:
        return {}
    # find top BM(s) by sum of col
    top_bms_list = df.groupby(bm_col)[col].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df[bm_col]==bm]
        perf = bm_df.groupby(partner_col)[col].sum().sort_values(ascending=False).reset_index().rename(columns={col:'value'})
        total_perf = perf['value'].sum() if perf['value'].sum()!=0 else 1
        leaders = perf.head(top_n).copy()
        leaders['% Share'] = round(leaders['value']/total_perf*100,2)
        laggers = perf.tail(top_n).copy()
        laggers['% Share'] = round(laggers['value']/total_perf*100,2)
        text = f"{level_name}: {level_value if level_value else bm}\nKPI Column: {col}\nTop Leader:\n"
        for _, row in leaders.iterrows():
            text += f"- {row[partner_col]}: {row['value']:,.2f} ({row['% Share']}%)\n"
        text += "\nBottom Lagger:\n"
        for _, row in laggers.iterrows():
            text += f"- {row[partner_col]}: {row['value']:,.2f} ({row['% Share']}%)\n"
        prompt = f"""
{prompt_header or ('Analyze ' + col)}
Data:
{text}
Analyze the provided KPI data ONLY. Do NOT give generic advice.
Write insights for {level_name} in this structure:
1. Dependency on leader – identify top partner and quantify impact.
2. Fix for lagger – identify bottom partner and recommend improvement.
3. Impact on {level_name} – explain how top/bottom performers affect overall performance.
Limit output to 150–200 words. Use data values explicitly.
"""
        insight = generator.generate_insight(prompt)
        ll_texts[bm] = text + "\nLLM Insight:\n" + insight
    return ll_texts

def detect_drastic_changes(df, kpis_to_check=None):
    change_records = []
    if kpis_to_check is None:
        kpis_to_check = kpi_list.copy()
    df_sorted = df.sort_values([get_bm_col(df), get_partner_col(df), 'FY_Year'])
    for (bm, partner), group in df_sorted.groupby([get_bm_col(df), get_partner_col(df)]):
        group = group.sort_values('FY_Year')
        for i in range(1,len(group)):
            prev, curr = group.iloc[i-1], group.iloc[i]
            for kpi in kpis_to_check:
                # map kpi to actual column if possible (user KPI names may not be raw column names)
                actual_col = None
                # if kpi matches a column directly
                if kpi in df.columns:
                    actual_col = kpi
                else:
                    # check our mapping for equivalence
                    for friendly, actual in COLUMN_MAP.items():
                        if kpi.lower() in friendly.lower() or friendly.lower() in kpi.lower():
                            if actual in df.columns:
                                actual_col = actual
                                break
                if actual_col is None:
                    continue
                prev_val, curr_val = prev.get(actual_col, np.nan), curr.get(actual_col, np.nan)
                if pd.isna(prev_val) or pd.isna(curr_val) or prev_val==0: continue
                try:
                    pct_change = (curr_val-prev_val)/abs(prev_val)*100
                except Exception:
                    continue
                if abs(pct_change)>=threshold_pct_change:
                    change_records.append({
                        'BM': bm, 'Partner Name': partner, 'KPI': actual_col,
                        'Year From': prev.get('FY_Year'), 'Year To': curr.get('FY_Year'),
                        'Previous Value': round(prev_val,2), 'Current Value': round(curr_val,2),
                        '% Change': round(pct_change,2)
                    })
    return pd.DataFrame(change_records)

def generate_drastic_change_insight(df, level_name=None, level_value=None, kpis_to_check=None):
    change_df = detect_drastic_changes(df, kpis_to_check=kpis_to_check)
    if change_df.empty:
        return {level_value if level_value else "All": "No significant changes detected."}
    
    bm_insights = {}
    for bm, group in change_df.groupby('BM'):
        combined = f"{level_name}: {level_value if level_value else bm}\n" if level_name else f"BM: {bm}\n"
        top_partner = (
            group.groupby('Partner Name')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(top_partners)
            .index
        )
        for partner in top_partner:
            partner_data = group[group['Partner Name'] == partner]
            prompt = f"""
{level_name}: {level_value if level_value else bm}
Partner: {partner}
Data:
{partner_data.to_string(index=False)}
Analyze KPI changes using ONLY the provided data.
Write insights for {level_name} in this structure:
1. Largest KPI Change – identify the KPI, partner, and magnitude of change.
2. Reason & Risk – explain why this change happened based on data patterns (e.g., low base, spike/drop) and its business risk.
3. Action – give MF-sales-specific steps for {level_name} (partner engagement, mid-tier activation, continuity plans). Avoid generic advice.
Limit to 150–200 words. Use numbers from the data.
"""
            insight = generator.generate_insight(prompt)
            combined += f"\nPartner: {partner}\n{insight}\n" + ("-"*80) + "\n"
        bm_insights[bm] = combined
    return bm_insights

def generate_focus_area_insight(df, level_name=None, level_value=None):
    focus_records = []
    partner_col = get_partner_col(df)
    for (bm, partner), group in df.groupby([get_bm_col(df), partner_col]):
        partner_row = group.sort_values('FY_Year').iloc[-1]
        gaps = {}
        for kpi in kpi_list:
            # try to find a target column related to kpi
            target_cols = [col for col in df.columns if 'Target' in col and kpi.split()[0] in col]
            target_val = partner_row[target_cols[0]] if target_cols else np.nan
            # determine actual col for KPI: either exact match or use mapping
            actual_col = None
            if kpi in df.columns:
                actual_col = kpi
            else:
                # check mapping friendly names
                for friendly, actual in COLUMN_MAP.items():
                    if kpi.lower().split()[0] in friendly.lower() and actual in df.columns:
                        actual_col = actual
                        break
            actual_val = partner_row.get(actual_col, np.nan) if actual_col else np.nan
            if pd.isna(target_val) or target_val == 0:
                gap_pct = 0
            else:
                try:
                    gap_pct = ((target_val - actual_val)/target_val*100)
                except Exception:
                    gap_pct = 0
            gaps[kpi] = round(gap_pct,2)
        max_gap = max(gaps.values()) if gaps else 0
        sorted_gaps = sorted(gaps.items(), key=lambda x:x[1], reverse=True)
        focus_records.append({'BM':bm,'Partner Name':partner,'Focus KPIs':sorted_gaps,'Max Gap':max_gap})
    
    focus_df = pd.DataFrame(focus_records)
    if focus_df.empty:
        return {}
    top_bm = focus_df.groupby('BM')['Max Gap'].max().sort_values(ascending=False).head(top_bms).index
    filtered = focus_df[focus_df['BM'].isin(top_bm)]
    top_data = (
        filtered.groupby('BM',group_keys=False)
        .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
        .reset_index(drop=True)
    )
    
    bm_insights = {}
    for bm, group in top_data.groupby('BM'):
        bm_insights[bm] = {}
        for _, row in group.iterrows():
            partner = row['Partner Name']
            focus_kpis = row['Focus KPIs']
            prompt = f"""
{level_name}: {level_value if level_value else bm}
Partner: {partner}
Data:
{focus_kpis}
Analyze KPIs and gaps using ONLY the provided data.
Write insights for {level_name} in this structure:
1. KPIs to fix – identify top underperforming KPIs per partner.
2. Improvement required – quantify the gap and suggest realistic targets.
3. Action plan – provide specific steps for {level_name} to improve performance.
Limit to 150–200 words. Use numbers from the data.
"""
            insight = generator.generate_insight(prompt)
            bm_insights[bm][partner] = insight
    return bm_insights

# ================= EXTRA HELPER FUNCTIONS (participation, active partners, correlation) =================
def active_partners(df, actual_col):
    """Count unique partners with actual_col > 0"""
    partner_col = get_partner_col(df)
    if actual_col not in df.columns:
        return 0
    return int(df[df[actual_col] > 0][partner_col].nunique())

def participation_rate(df, actual_col):
    partner_col = get_partner_col(df)
    total = df[partner_col].nunique() if partner_col in df.columns else 0
    if total == 0 or actual_col not in df.columns:
        return 0.0
    active = df[df[actual_col] > 0][partner_col].nunique()
    return round(active / total * 100, 2)

def calculate_correlation(df, col1, col2):
    if col1 not in df.columns or col2 not in df.columns:
        return None
    s = df[[col1, col2]].dropna()
    if s.empty:
        return None
    try:
        return round(s[col1].corr(s[col2]), 3)
    except Exception:
        return None

def generic_concentration(df, actual_col):
    return partner_concentration(df, actual_col)

def generate_custom_insight(title, df, value_dict):
    text = f"{title}\n"
    for k, v in value_dict.items():
        text += f"- {k}: {v}\n"
    prompt = f"""
Analyze ONLY the data provided:
{text}
Write insights in 120–180 words with:
1. What the numbers indicate  
2. Risk/impact  
3. Action items  
Use only the data.
"""
    insight = generator.generate_insight(prompt)
    return text + "\nLLM Insight:\n" + insight

# ========== MAIN EXECUTION ==========
# Assumption: `final_df` is already loaded in the environment as a pandas DataFrame
# and includes columns: 'FY_Year', 'ZM', 'BM', 'Relationship Handler' (or alternatives)
# The code is defensive and will raise readable errors if core columns are missing.

final_insights = {}

# basic checks
if 'FY_Year' not in globals() and 'FY_Year' not in locals():
    # FY_Year should be a column in final_df, not a global var - just warning here
    pass

# Example: final_df must exist
try:
    final_df
except NameError:
    raise NameError("final_df not found. Please load your merged dataframe into the variable `final_df` and run again.")

# normalize column names (strip trailing spaces)
final_df.columns = [c.strip() if isinstance(c, str) else c for c in final_df.columns]

ZM_COL = get_zm_col(final_df)
BM_COL = get_bm_col(final_df)
RH_COL = get_rh_col(final_df)
PARTNER_COL = get_partner_col(final_df)

# Build list of additional KPI actual columns to check in drastic-change routine
additional_actual_kpis = []
for friendly, actual in COLUMN_MAP.items():
    if actual in final_df.columns:
        additional_actual_kpis.append(actual)

# combine kpis for drastic detection (keep the original kpi_list names as placeholders + mapped actuals)
kpis_for_drastic = additional_actual_kpis.copy()
# Also attempt to include direct matches for kpi_list names if they exist as columns
for k in kpi_list:
    if k in final_df.columns:
        kpis_for_drastic.append(k)

# identify top ZM(s) based on sum of selected KPI columns (use Equity Net Sales + Total Net Sales if available)
# fallback to ZM grouping by Equity Net Sales (if present) otherwise sum of all numeric
if COLUMN_MAP.get("Equity Net Sales") in final_df.columns:
    top_zms = final_df.groupby(ZM_COL)[COLUMN_MAP["Equity Net Sales"]].sum().sort_values(ascending=False).head(1).index
else:
    # fallback: sum numeric columns per ZM
    numeric_cols = final_df.select_dtypes(include=[np.number]).columns.tolist()
    if numeric_cols:
        top_zms = final_df.groupby(ZM_COL)[numeric_cols].sum().sum(axis=1).sort_values(ascending=False).head(1).index
    else:
        top_zms = final_df[ZM_COL].dropna().unique()[:1]

for zm in top_zms:
    zm_group = final_df[final_df[ZM_COL] == zm]
    final_insights[zm] = {}

    # TOP 1 BM under this ZM (by sum of Equity Net Sales if available else numeric fallback)
    if COLUMN_MAP.get("Equity Net Sales") in zm_group.columns:
        top_bm = zm_group.groupby(BM_COL)[COLUMN_MAP["Equity Net Sales"]].sum().sort_values(ascending=False).head(1).index
    else:
        numeric_cols = zm_group.select_dtypes(include=[np.number]).columns.tolist()
        if numeric_cols:
            top_bm = zm_group.groupby(BM_COL)[numeric_cols].sum().sum(axis=1).sort_values(ascending=False).head(1).index
        else:
            top_bm = zm_group[BM_COL].dropna().unique()[:1]

    bm_df = zm_group[zm_group[BM_COL].isin(top_bm)]

    # Keep original KPI analysis (existing engine) - we map user's kpi_list to actual columns where possible
    final_insights[zm]['ZM_Overview'] = {}
    for kpi in kpi_list:
        # attempt to map kpi to an actual column if it exists in the dataframe
        actual_col = kpi if kpi in bm_df.columns else None
        # If a mapping exists in COLUMN_MAP whose friendly name contains kpi's first word, use it
        if actual_col is None:
            for friendly, actual in COLUMN_MAP.items():
                if kpi.split()[0].lower() in friendly.lower() and actual in bm_df.columns:
                    actual_col = actual
                    break
        # default to existing kpi string column (may be present in other datasets)
        final_insights[zm]['ZM_Overview'][kpi] = {
            'Partner Concentration': generate_partner_concentration_insight(bm_df, actual_col or kpi, level_name="ZM", level_value=zm, prompt_header=kpi_prompts.get(kpi)),
            'Leaders & Laggers': generate_leaders_laggers_insight(bm_df, actual_col or kpi, level_name="ZM", level_value=zm, prompt_header=kpi_prompts.get(kpi)),
            'Drastic Change': generate_drastic_change_insight(bm_df, level_name="ZM", level_value=zm, kpis_to_check=[actual_col] if actual_col else None),
            'Areas to Focus': generate_focus_area_insight(bm_df, level_name="ZM", level_value=zm)
        }

    # ========== EXTRA METRICS (PMS, MARS, Clients, Saturday School, AMC, LAS, Correlations) ==========
    extra_outputs = {}

    # PMS
    pms_col = COLUMN_MAP.get("PMS Net Sales")
    extra_outputs["PMS"] = {
        "Active Partners": active_partners(bm_df, pms_col) if pms_col else 0,
        "Concentration": generic_concentration(bm_df, pms_col)[0] if pms_col else {}
    }

    # MARS
    mars_col = COLUMN_MAP.get("MARS Net Sales")
    extra_outputs["MARS"] = {
        "Active Partners": active_partners(bm_df, mars_col) if mars_col else 0,
        "Concentration": generic_concentration(bm_df, mars_col)[0] if mars_col else {}
    }

    # Client Acquisition
    clients_col = COLUMN_MAP.get("Clients Acquired")
    extra_outputs["Client Acquisition"] = {
        "Active Partners (with CA>0)": active_partners(bm_df, clients_col) if clients_col else 0,
        "Concentration": generic_concentration(bm_df, clients_col)[0] if clients_col else {}
    }

    # Saturday School
    sat_col = COLUMN_MAP.get("Saturday School")
    inv_sat_col = COLUMN_MAP.get("Investment Saturday School")
    ins_sat_col = COLUMN_MAP.get("Insurance Saturday School")
    extra_outputs["Saturday School"] = {
        "Participation % (Total)": participation_rate(bm_df, sat_col) if sat_col else 0,
        "Participation % (Investment)": participation_rate(bm_df, inv_sat_col) if inv_sat_col else 0,
        "Participation % (Insurance)": participation_rate(bm_df, ins_sat_col) if ins_sat_col else 0,
        "Concentration (Total)": generic_concentration(bm_df, sat_col)[0] if sat_col else {},
        "Correlation with Clients Acquired": calculate_correlation(bm_df, sat_col, clients_col) if sat_col and clients_col else None,
        "Correlation with Equity Net Sales": calculate_correlation(bm_df, sat_col, COLUMN_MAP.get("Equity Net Sales")) if sat_col and COLUMN_MAP.get("Equity Net Sales") in bm_df.columns else None
    }

    # AMC
    amc_ach_col = COLUMN_MAP.get("AMC NS Ach")
    amc_target_col = COLUMN_MAP.get("AMC NS Target")
    extra_outputs["AMC"] = {
        "Total AMC Ach (sum)": float(bm_df[amc_ach_col].sum()) if amc_ach_col and amc_ach_col in bm_df.columns else 0.0,
        "Concentration": generic_concentration(bm_df, amc_ach_col)[0] if amc_ach_col else {},
        "Participation % (AMC Ach>0)": participation_rate(bm_df, amc_ach_col) if amc_ach_col else 0
    }

    # LAS
    las_col = COLUMN_MAP.get("LAS_Achievement")
    extra_outputs["LAS"] = {
        "Participation % (LAS>0)": participation_rate(bm_df, las_col) if las_col else 0,
        "Concentration": generic_concentration(bm_df, las_col)[0] if las_col else {}
    }

    # Correlations (PRU not provided so skipped)
    extra_outputs["Correlations"] = {
        "Saturday School ↔ Clients Acquired": calculate_correlation(bm_df, sat_col, clients_col) if sat_col and clients_col else None,
        "Clients Acquired ↔ Equity Net Sales": calculate_correlation(bm_df, clients_col, COLUMN_MAP.get("Equity Net Sales")) if clients_col and COLUMN_MAP.get("Equity Net Sales") in bm_df.columns else None,
        "MARS Net Sales ↔ Equity Net Sales": calculate_correlation(bm_df, mars_col, COLUMN_MAP.get("Equity Net Sales")) if mars_col and COLUMN_MAP.get("Equity Net Sales") in bm_df.columns else None
    }

    # Save extra outputs under ZM
    final_insights[zm]['EXTRA'] = {}
    for key, metrics in extra_outputs.items():
        final_insights[zm]['EXTRA'][key] = generate_custom_insight(key, bm_df, metrics)

    # ========== BM LEVEL ==========
    for bm in top_bm:
        bm_group = zm_group[zm_group[BM_COL] == bm]
        final_insights[zm][bm] = {}

        for kpi in kpi_list:
            actual_col = kpi if kpi in bm_group.columns else None
            if actual_col is None:
                for friendly, actual in COLUMN_MAP.items():
                    if kpi.split()[0].lower() in friendly.lower() and actual in bm_group.columns:
                        actual_col = actual
                        break
            final_insights[zm][bm][kpi] = {
                'Partner Concentration': generate_partner_concentration_insight(bm_group, actual_col or kpi, level_name="BM", level_value=bm, prompt_header=kpi_prompts.get(kpi)),
                'Leaders & Laggers': generate_leaders_laggers_insight(bm_group, actual_col or kpi, level_name="BM", level_value=bm, prompt_header=kpi_prompts.get(kpi)),
                'Drastic Change': generate_drastic_change_insight(bm_group, level_name="BM", level_value=bm, kpis_to_check=[actual_col] if actual_col else None),
                'Areas to Focus': generate_focus_area_insight(bm_group, level_name="BM", level_value=bm)
            }

        # TOP 1 RH
        # compute top RH by sum of Equity Net Sales if available else numeric fallback
        if COLUMN_MAP.get("Equity Net Sales") in bm_group.columns:
            top_rh = bm_group.groupby(RH_COL)[COLUMN_MAP["Equity Net Sales"]].sum().sort_values(ascending=False).head(1).index
        else:
            numeric_cols = bm_group.select_dtypes(include=[np.number]).columns.tolist()
            if numeric_cols:
                top_rh = bm_group.groupby(RH_COL)[numeric_cols].sum().sum(axis=1).sort_values(ascending=False).head(1).index
            else:
                top_rh = bm_group[RH_COL].dropna().unique()[:1]

        for rh in top_rh:
            rh_group = bm_group[bm_group[RH_COL] == rh]
            final_insights[zm][bm][rh] = {}
            for kpi in kpi_list:
                actual_col = kpi if kpi in rh_group.columns else None
                if actual_col is None:
                    for friendly, actual in COLUMN_MAP.items():
                        if kpi.split()[0].lower() in friendly.lower() and actual in rh_group.columns:
                            actual_col = actual
                            break
                final_insights[zm][bm][rh][kpi] = {
                    'Partner Concentration': generate_partner_concentration_insight(rh_group, actual_col or kpi, level_name="RH", level_value=rh, prompt_header=kpi_prompts.get(kpi)),
                    'Leaders & Laggers': generate_leaders_laggers_insight(rh_group, actual_col or kpi, level_name="RH", level_value=rh, prompt_header=kpi_prompts.get(kpi)),
                    'Drastic Change': generate_drastic_change_insight(rh_group, level_name="RH", level_value=rh, kpis_to_check=[actual_col] if actual_col else None),
                    'Areas to Focus': generate_focus_area_insight(rh_group, level_name="RH", level_value=rh)
                }

# ========== SAVE or RETURN final_insights ==========
# final_insights now contains:
# - ZM overview for original KPIs
# - EXTRA analysis for PMS, MARS, Saturday School, AMC, LAS, Correlations
# - BM and RH level analyses for the original KPIs

# You can now inspect final_insights, or write it to a JSON file:
import json
with open("final_insights.json", "w", encoding="utf-8") as f:
    json.dump(final_insights, f, default=str, indent=2)

print("Done. final_insights written to final_insights.json")


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


Done. final_insights written to final_insights.json


In [168]:
import json

print(json.dumps(final_insights, indent=4))
def pretty_print_insights(insights, level=0):
    indent = "    " * level
    for key, value in insights.items():
        if isinstance(value, dict):
            print(f"\n{indent}{key}:")
            pretty_print_insights(value, level + 1)
        else:
            print(f"{indent}- {key}: {value}")  

pretty_print_insights(final_insights)


{
    "SARFARAZ ABDULLA PATEL": {
        "ZM_Overview": {
            "Equity Sales": {
                "Partner Concentration": {
                    "SAMEER PAKALE": "ZM: SARFARAZ ABDULLA PATEL\nKPI Column: Equity Sales\nTotal Partners: 429\n- Top 6 partners (1.4%) contribute 25% of total Equity Sales.\n- Top 18 partners (4.2%) contribute 50% of total Equity Sales.\n- Top 46 partners (10.72%) contribute 75% of total Equity Sales.\n- Top 54 partners (12.59%) contribute 80% of total Equity Sales.\n- Top 82 partners (19.11%) contribute 90% of total Equity Sales.\n\nTop Partner:\n- Vasant Rangnath Kute: 712,456,939.45 (7.06%)\n\nLLM Insight:\nHere's an analysis of Sarfaraz Abdulla Patel's Equity Sales performance based on the provided data: 1. **Concentration level:** A significant portion of equity sales is concentrated within a small percentage of partners. The top 1.4% of partners contribute 25% of total sales, and the top 19.11% generate 90% of sales. Vasant Rangnath Kute, the top p

In [169]:
pretty_print_insights(final_insights)


SARFARAZ ABDULLA PATEL:

    ZM_Overview:

        Equity Sales:

            Partner Concentration:
                - SAMEER PAKALE: ZM: SARFARAZ ABDULLA PATEL
KPI Column: Equity Sales
Total Partners: 429
- Top 6 partners (1.4%) contribute 25% of total Equity Sales.
- Top 18 partners (4.2%) contribute 50% of total Equity Sales.
- Top 46 partners (10.72%) contribute 75% of total Equity Sales.
- Top 54 partners (12.59%) contribute 80% of total Equity Sales.
- Top 82 partners (19.11%) contribute 90% of total Equity Sales.

Top Partner:
- Vasant Rangnath Kute: 712,456,939.45 (7.06%)

LLM Insight:
Here's an analysis of Sarfaraz Abdulla Patel's Equity Sales performance based on the provided data: 1. **Concentration level:** A significant portion of equity sales is concentrated within a small percentage of partners. The top 1.4% of partners contribute 25% of total sales, and the top 19.11% generate 90% of sales. Vasant Rangnath Kute, the top partner, contributes a substantial 7.06% of the t

In [178]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ================= GEMINI SETUP =================
gemini_api_key = "AIzaSyADwv71iu5J5m9TK0oV2lDgZxQrTLkg1K0"
genai.configure(api_key=gemini_api_key)

class DeepInsightGenerator:
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str, max_words: int = 200) -> str:
        try:
            response = self.model.generate_content(prompt)
            words = response.text.strip().split()
            return " ".join(words[:max_words])
        except Exception as e:
            return f"Error generating insight: {e}"

generator = DeepInsightGenerator()

# ================= CONFIG =================
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]
thresholds = [25, 50, 75, 80, 90]

top_bms = 1
top_partners = 1
top_n = 1  
threshold_pct_change = 50  

kpi_prompts = {
    'Equity Sales': "Analyze Equity Sales performance.",
    'SIP Sales Achievement': "Analyze SIP Sales Achievement performance.",
    'Net Sales through MARS': "Analyze Net Sales through MARS performance.",
    'Investment Net Sales Achievement': "Analyze Investment Net Sales Achievement."
}

# ================= CLIENT REQUIREMENT CALCULATION FUNCTIONS =================

def calculate_active_partners(df, kpi_column):
    """Calculate number of active partners (non-zero values)"""
    if kpi_column not in df.columns:
        return None
    
    active_partners = df[df[kpi_column] > 0]['Partner Name_x'].nunique()
    total_partners = df['Partner Name_x'].nunique()
    participation_rate = (active_partners / total_partners * 100) if total_partners > 0 else 0
    return {
        'Active Partners': active_partners,
        'Total Partners': total_partners,
        'Participation Rate %': round(participation_rate, 2)
    }

def calculate_concentration_metrics(df, kpi_column):
    """Calculate concentration metrics like HHI, Top 20% share, etc."""
    if kpi_column not in df.columns:
        return None
    
    partner_totals = df.groupby('Partner Name_x')[kpi_column].sum()
    total_kpi = partner_totals.sum()
    
    if total_kpi == 0 or len(partner_totals) == 0:
        return {
            'HHI Index': 0,
            'Top 10% Share %': 0,
            'Top 20% Share %': 0,
            'Gini Coefficient': 0
        }
    
    # Calculate HHI (Herfindahl-Hirschman Index)
    market_shares = (partner_totals / total_kpi * 100)
    hhi = (market_shares ** 2).sum() / 10000  # Normalize to 0-1
    
    # Calculate Top X% shares
    sorted_partners = partner_totals.sort_values(ascending=False)
    cumulative_percentage = (sorted_partners.cumsum() / total_kpi * 100).reset_index()
    
    top_10_threshold = max(1, int(len(sorted_partners) * 0.1))
    top_20_threshold = max(1, int(len(sorted_partners) * 0.2))
    
    top_10_share = sorted_partners.head(top_10_threshold).sum() / total_kpi * 100
    top_20_share = sorted_partners.head(top_20_threshold).sum() / total_kpi * 100
    
    # Simple Gini coefficient calculation
    values = sorted_partners.values
    n = len(values)
    if n > 0 and np.sum(values) > 0:
        index = np.arange(1, n + 1)
        gini = (np.sum((2 * index - n - 1) * values)) / (n * np.sum(values))
    else:
        gini = 0
    
    return {
        'HHI Index': round(hhi, 4),
        'Top 10% Share %': round(top_10_share, 2),
        'Top 20% Share %': round(top_20_share, 2),
        'Gini Coefficient': round(gini, 4)
    }

def calculate_correlation_analysis(df, x_column, y_column):
    """Calculate correlation between two metrics (e.g., Saturday School vs Client Acquisition)"""
    if x_column not in df.columns or y_column not in df.columns:
        return None
    
    # Filter out zeros and nulls
    valid_data = df[[x_column, y_column]].dropna()
    valid_data = valid_data[(valid_data[x_column] > 0) & (valid_data[y_column] > 0)]
    
    if len(valid_data) < 2:
        return {
            'Correlation Coefficient': 0,
            'Sample Size': len(valid_data),
            'Interpretation': 'Insufficient data'
        }
    
    correlation = valid_data[x_column].corr(valid_data[y_column])
    
    # Interpretation
    if abs(correlation) >= 0.7:
        strength = "Strong"
    elif abs(correlation) >= 0.3:
        strength = "Moderate"
    else:
        strength = "Weak"
    
    direction = "positive" if correlation > 0 else "negative"
    
    return {
        'Correlation Coefficient': round(correlation, 4),
        'Sample Size': len(valid_data),
        'Strength': strength,
        'Direction': direction,
        'Interpretation': f"{strength} {direction} correlation"
    }

def calculate_pms_analysis(df):
    """Specific analysis for PMS requirements"""
    results = {}
    
    # For both years
    pms_metrics = ['PMS AUM FY 23-24 YTD', 'PMS AUM FY 24-25 Q4 YTD',
                   'Net NJ PMS Sales', 'Net Non-NJ PMS Sales']
    
    for metric in pms_metrics:
        active_info = calculate_active_partners(df, metric)
        conc_info = calculate_concentration_metrics(df, metric)
        if active_info or conc_info:
            results[metric] = {
                'Active Partners Analysis': active_info,
                'Concentration Analysis': conc_info
            }
    
    return results if results else None

def calculate_mars_analysis(df):
    """Specific analysis for MARS requirements"""
    results = {}
    
    # Check multiple MARS metrics
    mars_metrics = ['MARS AUM FY 23-24 YTD', 'MARS AUM FY 24-25 Q4 YTD', 
                    'MARS Net Sales FY 23-24 YTD', 'MARS Net Sales FY 24-25 Q4 YTD',
                    'Net Sales through MARS']
    
    for metric in mars_metrics:
        active_info = calculate_active_partners(df, metric)
        conc_info = calculate_concentration_metrics(df, metric)
        if active_info or conc_info:
            results[metric] = {
                'Active Partners Analysis': active_info,
                'Concentration Analysis': conc_info
            }
    
    return results if results else None

def calculate_saturday_school_analysis(df):
    """Specific analysis for Saturday School requirements"""
    results = {}
    
    # 1. Concentration of Partners
    sat_school_metrics = ['Saturday School (YTD) FY 23-24 Q4 YTD', 
                         'Saturday School (YTD) FY 24-25 Q4 YTD',
                         'Investment\nSaturday School (YTD) FY 23-24 Q4 YTD',
                         'Investment\nSaturday School (YTD) FY 24-25 Q4 YTD',
                         'Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD',
                         'Insurance\nSaturday School (YTD) FY 24-25 Q4 YTD']
    
    for metric in sat_school_metrics:
        conc_info = calculate_concentration_metrics(df, metric)
        active_info = calculate_active_partners(df, metric)
        if conc_info or active_info:
            results[f'{metric}_Concentration'] = conc_info
            results[f'{metric}_Participation'] = active_info
    
    # 2. Correlation to Client Acquisition
    client_acq_metrics = ['Clients Acquired FY 23-24 YTD', 
                         'Clients Acquired FY 24-25 Q4 YTD',
                         'Client Acquisition_Achievement',
                         'Client Acquisition_% Achievement',
                         'Client Acquisition% Achievement']
    
    for sat_metric in sat_school_metrics:
        for client_metric in client_acq_metrics:
            if sat_metric in df.columns and client_metric in df.columns:
                corr_info = calculate_correlation_analysis(df, sat_metric, client_metric)
                if corr_info:
                    results[f'Correlation_{sat_metric}_vs_{client_metric}'] = corr_info
    
    return results if results else None

def calculate_pru_usage_analysis(df):
    """Analysis for PRU Usage"""
    results = {}
    
    pru_metrics = ['Total AUM FY 23-24 Q4 YTD', 'Total AUM FY 24-25 Q4 YTD',
                  'Equity+ Hyb AUM FY 23-24 Q4 YTD', 'Equity AUM FY 24-25 Q4 YTD']
    
    for metric in pru_metrics:
        participation = calculate_active_partners(df, metric)
        concentration = calculate_concentration_metrics(df, metric)
        if participation or concentration:
            results[metric] = {
                'Participation': participation,
                'Concentration': concentration
            }
    
    return results if results else None

def calculate_nj_amc_analysis(df):
    """Analysis for NJ AMC Net Sales Insights"""
    results = {}
    
    amc_metrics = ['AMC NS Ach\nFY 23-24 Q4 YTD\n(in Cr)', 
                  'AMC NS Ach\nFY 24-25 Q4 YTD',
                  'AMC NS Target\nFY 23-24 Q4 YTD\n(in Cr)',
                  'AMC NS Target\nFY 24-25 Q4 YTD']
    
    for metric in amc_metrics:
        if metric in df.columns:
            # Calculate concentration and participation
            participation = calculate_active_partners(df, metric)
            concentration = calculate_concentration_metrics(df, metric)
            
            metric_result = {}
            if participation:
                metric_result['Participation'] = participation
            if concentration:
                metric_result['Concentration'] = concentration
            
            # Calculate achievement rate if target exists
            if 'Target' in metric:
                ach_metric = metric.replace('Target', 'Ach')
                if ach_metric in df.columns:
                    total_ach = df[ach_metric].sum()
                    total_target = df[metric].sum()
                    achievement_rate = (total_ach / total_target * 100) if total_target > 0 else 0
                    
                    metric_result['Achievement'] = {
                        'Total Achievement': round(total_ach, 2),
                        'Total Target': round(total_target, 2),
                        'Achievement Rate %': round(achievement_rate, 2)
                    }
            
            if metric_result:
                results[metric] = metric_result
    
    return results if results else None

def calculate_non_pru_analysis(df):
    """Analysis for Non-PRU Usage"""
    results = {}
    
    non_pru_metrics = ['Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr)',
                      'Non-NJ AUM\nFY 24-25 Q4 YTD']
    
    for metric in non_pru_metrics:
        if metric in df.columns:
            participation = calculate_active_partners(df, metric)
            concentration = calculate_concentration_metrics(df, metric)
            
            metric_result = {}
            if participation:
                metric_result['Participation'] = participation
            if concentration:
                metric_result['Concentration'] = concentration
            
            # Compare with PRU usage if possible
            pru_metric = metric.replace('Non-NJ', 'Total')
            if pru_metric in df.columns:
                non_pru_total = df[metric].sum()
                pru_total = df[pru_metric].sum()
                non_pru_share = (non_pru_total / (non_pru_total + pru_total) * 100) if (non_pru_total + pru_total) > 0 else 0
                
                metric_result['Comparison'] = {
                    'Non-PRU Share %': round(non_pru_share, 2),
                    'Non-PRU Total': round(non_pru_total, 2),
                    'PRU Total': round(pru_total, 2)
                }
            
            if metric_result:
                results[metric] = metric_result
    
    return results if results else None

def calculate_las_analysis(df):
    """Analysis for LAS"""
    results = {}
    
    las_metrics = ['LAS_Achievement', 'LAS_% Achievement', 'LAS% Achievement']
    
    for metric in las_metrics:
        if metric in df.columns:
            participation = calculate_active_partners(df, metric)
            concentration = calculate_concentration_metrics(df, metric)
            
            metric_result = {}
            if participation:
                metric_result['Participation'] = participation
            if concentration:
                metric_result['Concentration'] = concentration
            
            if metric_result:
                results[metric] = metric_result
    
    return results if results else None

def calculate_client_acquisition_analysis(df):
    """Analysis for Client Acquisition & Equity Net Sales"""
    results = {}
    
    # Client Acquisition concentration
    client_metrics = ['Clients Acquired FY 23-24 YTD', 
                     'Clients Acquired FY 24-25 Q4 YTD',
                     'Client Acquisition_Achievement',
                     'Client Acquisition_% Achievement',
                     'Client Acquisition% Achievement']
    
    for metric in client_metrics:
        if metric in df.columns:
            concentration = calculate_concentration_metrics(df, metric)
            if concentration:
                results[f'{metric}_Concentration'] = concentration
    
    # Equity Net Sales check
    equity_metrics = ['Equity Net Sales\nFY 23-24 YTD',
                     'Equity Net Sales\nFY 24-25 Q4 YTD',
                     'Equity Net Sales_Achievement',
                     'Equity Net Sales_% Achievement',
                     'Equity Net Sales% Achievement',
                     'Equity Sales']
    
    for metric in equity_metrics:
        if metric in df.columns:
            stats = {
                'Total': round(df[metric].sum(), 2),
                'Average per Partner': round(df[metric].mean(), 2),
                'Max': round(df[metric].max(), 2),
                'Min': round(df[metric].min(), 2),
                'Std Dev': round(df[metric].std(), 2)
            }
            results[f'{metric}_Stats'] = stats
    
    return results if results else None

def generate_client_requirements_insights(df, level_name=None, level_value=None):
    """Generate insights for all client requirements"""
    insights = {}
    
    # 1. PMS Analysis
    pms_results = calculate_pms_analysis(df)
    if pms_results:
        insights['PMS_Analysis'] = pms_results
    
    # 2. MARS Analysis
    mars_results = calculate_mars_analysis(df)
    if mars_results:
        insights['MARS_Analysis'] = mars_results
    
    # 3. Saturday School Analysis
    sat_results = calculate_saturday_school_analysis(df)
    if sat_results:
        insights['Saturday_School_Analysis'] = sat_results
    
    # 4. PRU Usage Analysis
    pru_results = calculate_pru_usage_analysis(df)
    if pru_results:
        insights['PRU_Usage_Analysis'] = pru_results
    
    # 5. NJ AMC Analysis
    amc_results = calculate_nj_amc_analysis(df)
    if amc_results:
        insights['NJ_AMC_Analysis'] = amc_results
    
    # 6. Non-PRU Usage Analysis
    non_pru_results = calculate_non_pru_analysis(df)
    if non_pru_results:
        insights['Non_PRU_Analysis'] = non_pru_results
    
    # 7. LAS Analysis
    las_results = calculate_las_analysis(df)
    if las_results:
        insights['LAS_Analysis'] = las_results
    
    # 8. Client Acquisition & Equity Net Sales
    client_results = calculate_client_acquisition_analysis(df)
    if client_results:
        insights['Client_Acquisition_Equity_Analysis'] = client_results
    
    # Generate LLM insights for each analysis
    llm_insights = {}
    for analysis_name, analysis_data in insights.items():
        prompt = f"""
        Analysis Type: {analysis_name}
        Level: {level_name} - {level_value}
        
        Based on the following calculated metrics, provide insights:
        {analysis_data}
        
        Provide insights in this structure:
        1. Key Findings (summary of the data)
        2. Business Implications (what this means for the business)
        3. Recommendations (specific actions to take)
        
        Keep it concise and data-driven. Use the actual numbers from the data.
        """
        
        llm_insight = generator.generate_insight(prompt, max_words=250)
        llm_insights[analysis_name] = {
            'Calculated Metrics': analysis_data,
            'LLM Insight': llm_insight
        }
    
    return llm_insights if llm_insights else None

# ================= EXISTING HELPER FUNCTIONS =================
def partner_concentration(df, kpi):
    df_sorted = df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
    total_kpi = df_sorted[kpi].sum()
    df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
    df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100 if total_kpi != 0 else 0
    total_partners = len(df_sorted)
    conc_summary = {}
    for t in thresholds:
        if not df_sorted.empty:
            num_partners = (df_sorted['Cumulative KPI %'] >= t).idxmax() + 1
            conc_summary[f'Partners for {t}%'] = num_partners
            conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)
    return conc_summary, total_partners, df_sorted

def generate_partner_concentration_insight(df, kpi, level_name=None, level_value=None):
    conc_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        conc_summary, total_partners, df_sorted = partner_concentration(bm_df, kpi)
        text = f"{level_name}: {level_value if level_value else bm}\nKPI: {kpi}\nTotal Partners: {total_partners}\n"
        for t in thresholds:
            text += f"- Top {conc_summary.get(f'Partners for {t}%', 0)} partners ({conc_summary.get(f'% of Total Partners for {t}%',0)}%) contribute {t}% of total {kpi}.\n"
        topX = df_sorted.head(top_partners)[['Partner Name_x', kpi, 'Cumulative KPI %']]
        text += "\nTop Partner:\n"
        for _, row in topX.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['Cumulative KPI %']:.2f}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Generate insights ONLY from the data provided — do not give generic advices.
Write insights in this exact structure:
1. Concentration level 
2. Risk/impact 
3. Action recommendation Limit to 200 words
Limit to 200 words.
"""
        insight = generator.generate_insight(prompt)
        conc_texts[bm] = text + "\nLLM Insight:\n" + insight
    return conc_texts

def generate_leaders_laggers_insight(df, kpi, level_name=None, level_value=None):
    ll_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        perf = bm_df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
        total_perf = perf[kpi].sum() if perf[kpi].sum()!=0 else 1
        leaders = perf.head(top_n).copy()
        leaders['% Share'] = round(leaders[kpi]/total_perf*100,2)
        laggers = perf.tail(top_n).copy()
        laggers['% Share'] = round(laggers[kpi]/total_perf*100,2)
        text = f"{level_name}: {level_value if level_value else bm}\nKPI: {kpi}\nTop Leader:\n"
        for _, row in leaders.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        text += "\nBottom Lagger:\n"
        for _, row in laggers.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Analyze the provided KPI data ONLY. Do NOT give generic advice.
Write insights for {level_name} in this structure:
1. Dependency on leader – identify top partner and quantify impact.
2. Fix for lagger – identify bottom partner and recommend improvement.
3. Impact on {level_name} – explain how top/bottom performers affect overall performance.
Limit output to 150–200 words. Use data values explicitly.
"""
        insight = generator.generate_insight(prompt)
        ll_texts[bm] = text + "\nLLM Insight:\n" + insight
    return ll_texts

def detect_drastic_changes(df):
    change_records = []
    df_sorted = df.sort_values(['BM', 'Partner Name_x', 'FY_Year'])
    for (bm, partner), group in df_sorted.groupby(['BM','Partner Name_x']):
        group = group.sort_values('FY_Year')
        for i in range(1,len(group)):
            prev, curr = group.iloc[i-1], group.iloc[i]
            for kpi in kpi_list:
                prev_val, curr_val = prev[kpi], curr[kpi]
                if pd.isna(prev_val) or pd.isna(curr_val) or prev_val==0: continue
                pct_change = (curr_val-prev_val)/abs(prev_val)*100
                if abs(pct_change)>=threshold_pct_change:
                    change_records.append({
                        'BM': bm, 'Partner Name_x': partner, 'KPI': kpi,
                        'Year From': prev['FY_Year'], 'Year To': curr['FY_Year'],
                        'Previous Value': round(prev_val,2), 'Current Value': round(curr_val,2),
                        '% Change': round(pct_change,2)
                    })
    return pd.DataFrame(change_records)

def generate_drastic_change_insight(df, level_name=None, level_value=None):
    change_df = detect_drastic_changes(df)
    if change_df.empty:
        return {level_value if level_value else "All": "No significant changes detected."}
    
    bm_insights = {}
    for bm, group in change_df.groupby('BM'):
        combined = f"{level_name}: {level_value if level_value else bm}\n" if level_name else f"BM: {bm}\n"
        top_partner = (
            group.groupby('Partner Name_x')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(top_partners)
            .index
        )
        for partner in top_partner:
            partner_data = group[group['Partner Name_x'] == partner]
            prompt = f"""
{level_name}: {level_value if level_value else bm}
Partner: {partner}
Data:
{partner_data}
Analyze KPI changes using ONLY the provided data.
Write insights for {level_name} in this structure:
1. Largest KPI Change – identify the KPI, partner, and magnitude of change.
2. Reason & Risk – explain why this change happened based on data patterns (e.g., low base, spike/drop) and its business risk.
3. Action – give MF-sales-specific steps for {level_name} (partner engagement, mid-tier activation, continuity plans). Avoid generic advice.
Limit to 150–200 words. Use numbers from the data.
"""
            insight = generator.generate_insight(prompt)
            combined += f"\nPartner: {partner}\n{insight}\n" + ("-"*80) + "\n"
        bm_insights[bm] = combined
    return bm_insights

def generate_focus_area_insight(df, level_name=None, level_value=None):
    focus_records = []
    for (bm, partner), group in df.groupby(['BM','Partner Name_x']):
        partner_row = group.iloc[-1]
        gaps = {}
        for kpi in kpi_list:
            target_cols = [col for col in df.columns if 'Target' in col and kpi.split()[0] in col]
            target_val = partner_row[target_cols[0]] if target_cols else np.nan
            gap_pct = ((target_val - partner_row[kpi])/target_val*100 if target_val not in [0, None, np.nan] else 0)
            gaps[kpi] = round(gap_pct,2)
        max_gap = max(gaps.values())
        sorted_gaps = sorted(gaps.items(), key=lambda x:x[1], reverse=True)
        focus_records.append({'BM':bm,'Partner Name_x':partner,'Focus KPIs':sorted_gaps,'Max Gap':max_gap})
    
    focus_df = pd.DataFrame(focus_records)
    top_bm = focus_df.groupby('BM')['Max Gap'].max().sort_values(ascending=False).head(top_bms).index
    filtered = focus_df[focus_df['BM'].isin(top_bm)]
    top_data = (
        filtered.groupby('BM',group_keys=False)
        .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
        .reset_index(drop=True)
    )
    
    bm_insights = {}
    for bm, group in top_data.groupby('BM'):
        bm_insights[bm] = {}
        for _, row in group.iterrows():
            partner = row['Partner Name_x']
            focus_kpis = row['Focus KPIs']
            prompt = f"""
{level_name}: {level_value if level_value else bm}
Partner: {partner}
Data:
{focus_kpis}
Analyze KPIs and gaps using ONLY the provided data.
Write insights for {level_name} in this structure:
1. KPIs to fix – identify top underperforming KPIs per partner.
2. Improvement required – quantify the gap and suggest realistic targets.
3. Action plan – provide specific steps for {level_name} to improve performance.
Limit to 150–200 words. Use numbers from the data.
"""
            insight = generator.generate_insight(prompt)
            bm_insights[bm][partner] = insight
    return bm_insights

# ========== UPDATED MAIN EXECUTION ==========
final_insights = {}

top_zms = final_df.groupby('ZM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index

for zm in top_zms:
    zm_group = final_df[final_df['ZM'] == zm]
    final_insights[zm] = {}

    # TOP 1 BM under this ZM
    top_bm = zm_group.groupby('BM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index
    bm_df = zm_group[zm_group['BM'].isin(top_bm)]

    # ADD CLIENT REQUIREMENTS ANALYSIS AT ZM LEVEL
    client_req_insights = generate_client_requirements_insights(
        bm_df, level_name="ZM", level_value=zm
    )
    if client_req_insights:
        final_insights[zm]['Client_Requirements_Analysis'] = client_req_insights

    final_insights[zm]['ZM_Overview'] = {}
    for kpi in kpi_list:
        final_insights[zm]['ZM_Overview'][kpi] = {
            'Partner Concentration': generate_partner_concentration_insight(bm_df, kpi, level_name="ZM", level_value=zm),
            'Leaders & Laggers': generate_leaders_laggers_insight(bm_df, kpi, level_name="ZM", level_value=zm),
            'Drastic Change': generate_drastic_change_insight(bm_df, level_name="ZM", level_value=zm),
            'Areas to Focus': generate_focus_area_insight(bm_df, level_name="ZM", level_value=zm)
        }

    # BM LEVEL
    for bm in top_bm:
        bm_group = zm_group[zm_group['BM'] == bm]
        final_insights[zm][bm] = {}

        # ADD CLIENT REQUIREMENTS ANALYSIS AT BM LEVEL
        client_req_insights_bm = generate_client_requirements_insights(
            bm_group, level_name="BM", level_value=bm
        )
        if client_req_insights_bm:
            final_insights[zm][bm]['Client_Requirements_Analysis'] = client_req_insights_bm

        for kpi in kpi_list:
            final_insights[zm][bm][kpi] = {
                'Partner Concentration': generate_partner_concentration_insight(bm_group, kpi, level_name="BM", level_value=bm),
                'Leaders & Laggers': generate_leaders_laggers_insight(bm_group, kpi, level_name="BM", level_value=bm),
                'Drastic Change': generate_drastic_change_insight(bm_group, level_name="BM", level_value=bm),
                'Areas to Focus': generate_focus_area_insight(bm_group, level_name="BM", level_value=bm)
            }

        # TOP 1 RH
        top_rh = (
            bm_group.groupby('Relationship Handler')[kpi_list]
            .sum().sum(axis=1)
            .sort_values(ascending=False)
            .head(1).index
        )

        for rh in top_rh:
            rh_group = bm_group[bm_group['Relationship Handler'] == rh]
            final_insights[zm][bm][rh] = {}

            # ADD CLIENT REQUIREMENTS ANALYSIS AT RH LEVEL
            client_req_insights_rh = generate_client_requirements_insights(
                rh_group, level_name="RH", level_value=rh
            )
            if client_req_insights_rh:
                final_insights[zm][bm][rh]['Client_Requirements_Analysis'] = client_req_insights_rh

            for kpi in kpi_list:
                final_insights[zm][bm][rh][kpi] = {
                    'Partner Concentration': generate_partner_concentration_insight(rh_group, kpi, level_name="RH", level_value=rh),
                    'Leaders & Laggers': generate_leaders_laggers_insight(rh_group, kpi, level_name="RH", level_value=rh),
                    'Drastic Change': generate_drastic_change_insight(rh_group, level_name="RH", level_value=rh),
                    'Areas to Focus': generate_focus_area_insight(rh_group, level_name="RH", level_value=rh)
                }

print("Analysis completed successfully!")
print("\n=== CLIENT REQUIREMENTS COVERED ===")
print("✅ PMS - No. of Active Partners & Concentration")
print("✅ MARS - No. of Active Partners & Concentration")  
print("✅ Client Acquired - Concentration")
print("✅ Saturday School - Correlation to Client Acquisition")
print("✅ Saturday School - Concentration of Partners")
print("✅ Saturday School - % Partners Active /Participation")
print("✅ PRU Usage - Participation & Concentration")
print("✅ NJ AMC - AMC Net Sales Insights")
print("✅ NJ AMC - Concentration & Participation")
print("✅ Non-PRU Usage - Participation & Concentration")
print("✅ LAS - Participation & Concentration")
print("✅ Client Acquisition & Equity Net Sales - Checked")
print("\n=== ADDITIONAL ANALYSIS ===")
print("✅ Partner Concentration Analysis")
print("✅ Leaders & Laggers Analysis")
print("✅ Drastic Change Detection")
print("✅ Focus Areas Identification")

  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


Analysis completed successfully!

=== CLIENT REQUIREMENTS COVERED ===
✅ PMS - No. of Active Partners & Concentration
✅ MARS - No. of Active Partners & Concentration
✅ Client Acquired - Concentration
✅ Saturday School - Correlation to Client Acquisition
✅ Saturday School - Concentration of Partners
✅ Saturday School - % Partners Active /Participation
✅ PRU Usage - Participation & Concentration
✅ NJ AMC - AMC Net Sales Insights
✅ NJ AMC - Concentration & Participation
✅ Non-PRU Usage - Participation & Concentration
✅ LAS - Participation & Concentration
✅ Client Acquisition & Equity Net Sales - Checked

=== ADDITIONAL ANALYSIS ===
✅ Partner Concentration Analysis
✅ Leaders & Laggers Analysis
✅ Drastic Change Detection
✅ Focus Areas Identification


In [179]:
import json

print(json.dumps(final_insights, indent=4))
def pretty_print_insights(insights, level=0):
    indent = "    " * level
    for key, value in insights.items():
        if isinstance(value, dict):
            print(f"\n{indent}{key}:")
            pretty_print_insights(value, level + 1)
        else:
            print(f"{indent}- {key}: {value}")  

pretty_print_insights(final_insights)


{
    "SARFARAZ ABDULLA PATEL": {
        "Client_Requirements_Analysis": {
            "PMS_Analysis": {
                "Calculated Metrics": {
                    "PMS AUM FY 23-24 YTD": {
                        "Active Partners Analysis": {
                            "Active Partners": 12,
                            "Total Partners": 396,
                            "Participation Rate %": 3.03
                        },
                        "Concentration Analysis": {
                            "HHI Index": 0.202,
                            "Top 10% Share %": 100.0,
                            "Top 20% Share %": 100.0,
                            "Gini Coefficient": -0.9853
                        }
                    },
                    "PMS AUM FY 24-25 Q4 YTD": {
                        "Active Partners Analysis": {
                            "Active Partners": 12,
                            "Total Partners": 396,
                            "Participation Rate %"

In [180]:
pretty_print_insights(final_insights)


SARFARAZ ABDULLA PATEL:

    Client_Requirements_Analysis:

        PMS_Analysis:

            Calculated Metrics:

                PMS AUM FY 23-24 YTD:

                    Active Partners Analysis:
                        - Active Partners: 12
                        - Total Partners: 396
                        - Participation Rate %: 3.03

                    Concentration Analysis:
                        - HHI Index: 0.202
                        - Top 10% Share %: 100.0
                        - Top 20% Share %: 100.0
                        - Gini Coefficient: -0.9853

                PMS AUM FY 24-25 Q4 YTD:

                    Active Partners Analysis:
                        - Active Partners: 12
                        - Total Partners: 396
                        - Participation Rate %: 3.03

                    Concentration Analysis:
                        - HHI Index: 0.1681
                        - Top 10% Share %: 100.0
                        - Top 20% Share %: 

In [177]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ================= GEMINI SETUP =================
gemini_api_key = "AIzaSyADwv71iu5J5m9TK0oV2lDgZxQrTLkg1K0"
genai.configure(api_key=gemini_api_key)

class DeepInsightGenerator:
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str, max_words: int = 200) -> str:
        try:
            response = self.model.generate_content(prompt)
            words = response.text.strip().split()
            return " ".join(words[:max_words])
        except Exception as e:
            return f"Error generating insight: {e}"

generator = DeepInsightGenerator()

# ================= CONFIG =================
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement',
    # PMS - Active Partners & Concentration
    'PMS AUM FY 23-24 YTD',
    'PMS AUM FY 24-25 Q4 YTD',
    'Net NJ PMS Sales',
    'Net Non-NJ PMS Sales',
    
    # MARS - Active Partners & Concentration
    'MARS AUM FY 23-24 YTD',
    'MARS AUM FY 24-25 Q4 YTD',
    'MARS Net Sales FY 23-24 YTD',
    'MARS Net Sales FY 24-25 Q4 YTD',
    'Net Sales through MARS',
    
    # Client Acquired - Concentration
    'Clients Acquired FY 23-24 YTD',
    'Clients Acquired FY 24-25 Q4 YTD',
    'Client Acquisition_Achievement',
    'Client Acquisition_% Achievement',
    'Client Acquisition% Achievement',
    
    # Saturday School - Attendance metrics for correlation & concentration
    'Saturday School (YTD) FY 23-24 Q4 YTD',
    'Saturday School (YTD) FY 24-25 Q4 YTD',
    'Investment\nSaturday School (YTD) FY 23-24 Q4 YTD',
    'Investment\nSaturday School (YTD) FY 24-25 Q4 YTD',
    'Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD',
    'Insurance\nSaturday School (YTD) FY 24-25 Q4 YTD',
    
    # PRU Usage - Participation & Concentration
    'Total AUM FY 23-24 Q4 YTD',
    'Total AUM FY 24-25 Q4 YTD',
    'Equity+ Hyb AUM FY 23-24 Q4 YTD',
    'Equity AUM FY 24-25 Q4 YTD',
    
    # NJ AMC - AMC Net Sales Insights & Concentration
    'AMC NS Ach\nFY 23-24 Q4 YTD\n(in Cr)',
    'AMC NS Ach\nFY 24-25 Q4 YTD',
    'AMC NS Target\nFY 23-24 Q4 YTD\n(in Cr)',
    'AMC NS Target\nFY 24-25 Q4 YTD',
    
    # Non-PRU Usage - Participation & Concentration
    'Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr)',
    'Non-NJ AUM\nFY 24-25 Q4 YTD',
    
    # LAS - Participation & Concentration
    'LAS_Achievement',
    'LAS_% Achievement',
    'LAS% Achievement',
    
    # Client Acquisition & Equity Net Sales (to be checked)
    'Equity Net Sales\nFY 23-24 YTD',
    'Equity Net Sales\nFY 24-25 Q4 YTD',
    'Equity Net Sales_Achievement',
    'Equity Net Sales_% Achievement',
    'Equity Net Sales% Achievement'
]

thresholds = [25, 50, 75, 80, 90]

top_bms = 1
top_partners = 1
top_n = 1  
threshold_pct_change = 50  

# Initialize kpi_prompts with all KPIs
kpi_prompts = {
    'Equity Sales': "Analyze Equity Sales performance.",
    'SIP Sales Achievement': "Analyze SIP Sales Achievement performance.",
    'Net Sales through MARS': "Analyze Net Sales through MARS performance.",
    'Investment Net Sales Achievement': "Analyze Investment Net Sales Achievement.",
    # PMS Prompts
    'PMS AUM FY 23-24 YTD': "Analyze PMS AUM performance for FY 23-24.",
    'PMS AUM FY 24-25 Q4 YTD': "Analyze PMS AUM performance for FY 24-25.",
    'Net NJ PMS Sales': "Analyze Net NJ PMS Sales performance.",
    'Net Non-NJ PMS Sales': "Analyze Net Non-NJ PMS Sales performance.",
    # MARS Prompts
    'MARS AUM FY 23-24 YTD': "Analyze MARS AUM performance for FY 23-24.",
    'MARS AUM FY 24-25 Q4 YTD': "Analyze MARS AUM performance for FY 24-25.",
    'MARS Net Sales FY 23-24 YTD': "Analyze MARS Net Sales performance for FY 23-24.",
    'MARS Net Sales FY 24-25 Q4 YTD': "Analyze MARS Net Sales performance for FY 24-25.",
    # Client Acquisition Prompts
    'Clients Acquired FY 23-24 YTD': "Analyze Client Acquisition performance for FY 23-24.",
    'Clients Acquired FY 24-25 Q4 YTD': "Analyze Client Acquisition performance for FY 24-25.",
    'Client Acquisition_Achievement': "Analyze Client Acquisition Achievement.",
    'Client Acquisition_% Achievement': "Analyze Client Acquisition % Achievement.",
    'Client Acquisition% Achievement': "Analyze Client Acquisition % Achievement.",
    # Saturday School Prompts
    'Saturday School (YTD) FY 23-24 Q4 YTD': "Analyze Saturday School attendance for FY 23-24.",
    'Saturday School (YTD) FY 24-25 Q4 YTD': "Analyze Saturday School attendance for FY 24-25.",
    'Investment\nSaturday School (YTD) FY 23-24 Q4 YTD': "Analyze Investment Saturday School attendance for FY 23-24.",
    'Investment\nSaturday School (YTD) FY 24-25 Q4 YTD': "Analyze Investment Saturday School attendance for FY 24-25.",
    'Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD': "Analyze Insurance Saturday School attendance for FY 23-24.",
    'Insurance\nSaturday School (YTD) FY 24-25 Q4 YTD': "Analyze Insurance Saturday School attendance for FY 24-25.",
    # PRU Usage Prompts
    'Total AUM FY 23-24 Q4 YTD': "Analyze Total AUM for PRU usage in FY 23-24.",
    'Total AUM FY 24-25 Q4 YTD': "Analyze Total AUM for PRU usage in FY 24-25.",
    'Equity+ Hyb AUM FY 23-24 Q4 YTD': "Analyze Equity+Hyb AUM for PRU usage in FY 23-24.",
    'Equity AUM FY 24-25 Q4 YTD': "Analyze Equity AUM for PRU usage in FY 24-25.",
    # NJ AMC Prompts
    'AMC NS Ach\nFY 23-24 Q4 YTD\n(in Cr)': "Analyze NJ AMC Net Sales Achievement for FY 23-24.",
    'AMC NS Ach\nFY 24-25 Q4 YTD': "Analyze NJ AMC Net Sales Achievement for FY 24-25.",
    'AMC NS Target\nFY 23-24 Q4 YTD\n(in Cr)': "Analyze NJ AMC Net Sales Target for FY 23-24.",
    'AMC NS Target\nFY 24-25 Q4 YTD': "Analyze NJ AMC Net Sales Target for FY 24-25.",
    'Net Sales FY 23-24 YTD': "Analyze Net Sales performance for FY 23-24.",
    'Net Sales FY 24-25 Q4 YTD': "Analyze Net Sales performance for FY 24-25.",
    # Non-PRU Usage Prompts
    'Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr)': "Analyze Non-NJ AUM for FY 23-24.",
    'Non-NJ AUM\nFY 24-25 Q4 YTD': "Analyze Non-NJ AUM for FY 24-25.",
    # LAS Prompts
    'LAS_Achievement': "Analyze LAS Achievement.",
    'LAS_% Achievement': "Analyze LAS % Achievement.",
    'LAS% Achievement': "Analyze LAS % Achievement.",
}

# ================= HELPER FUNCTIONS =================
def partner_concentration(df, kpi):
    df_sorted = df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
    total_kpi = df_sorted[kpi].sum()
    df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
    df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100 if total_kpi != 0 else 0
    total_partners = len(df_sorted)
    conc_summary = {}
    for t in thresholds:
        if not df_sorted.empty:
            num_partners = (df_sorted['Cumulative KPI %'] >= t).idxmax() + 1
            conc_summary[f'Partners for {t}%'] = num_partners
            conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)
    return conc_summary, total_partners, df_sorted

def generate_partner_concentration_insight(df, kpi, level_name=None, level_value=None):
    conc_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        conc_summary, total_partners, df_sorted = partner_concentration(bm_df, kpi)
        text = f"{level_name}: {level_value if level_value else bm}\nKPI: {kpi}\nTotal Partners: {total_partners}\n"
        for t in thresholds:
            text += f"- Top {conc_summary.get(f'Partners for {t}%', 0)} partners ({conc_summary.get(f'% of Total Partners for {t}%',0)}%) contribute {t}% of total {kpi}.\n"
        topX = df_sorted.head(top_partners)[['Partner Name_x', kpi, 'Cumulative KPI %']]
        text += "\nTop Partner:\n"
        for _, row in topX.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['Cumulative KPI %']:.2f}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Generate insights ONLY from the data provided — do not give generic advices.
Write insights in this exact structure:
1. Concentration level 
2. Risk/impact 
3. Action recommendation Limit to 200 words
Limit to 200 words.
"""
        insight = generator.generate_insight(prompt)
        conc_texts[bm] = text + "\nLLM Insight:\n" + insight
    return conc_texts

def generate_leaders_laggers_insight(df, kpi, level_name=None, level_value=None):
    ll_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        perf = bm_df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
        total_perf = perf[kpi].sum() if perf[kpi].sum()!=0 else 1
        leaders = perf.head(top_n).copy()
        leaders['% Share'] = round(leaders[kpi]/total_perf*100,2)
        laggers = perf.tail(top_n).copy()
        laggers['% Share'] = round(laggers[kpi]/total_perf*100,2)
        text = f"{level_name}: {level_value if level_value else bm}\nKPI: {kpi}\nTop Leader:\n"
        for _, row in leaders.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        text += "\nBottom Lagger:\n"
        for _, row in laggers.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Analyze the provided KPI data ONLY. Do NOT give generic advice.
Write insights for {level_name} in this structure:
1. Dependency on leader – identify top partner and quantify impact.
2. Fix for lagger – identify bottom partner and recommend improvement.
3. Impact on {level_name} – explain how top/bottom performers affect overall performance.
Limit output to 150–200 words. Use data values explicitly.
"""
        insight = generator.generate_insight(prompt)
        ll_texts[bm] = text + "\nLLM Insight:\n" + insight
    return ll_texts

def detect_drastic_changes(df):
    change_records = []
    df_sorted = df.sort_values(['BM', 'Partner Name_x', 'FY_Year'])
    for (bm, partner), group in df_sorted.groupby(['BM','Partner Name_x']):
        group = group.sort_values('FY_Year')
        for i in range(1,len(group)):
            prev, curr = group.iloc[i-1], group.iloc[i]
            for kpi in kpi_list:
                prev_val, curr_val = prev[kpi], curr[kpi]
                if pd.isna(prev_val) or pd.isna(curr_val) or prev_val==0: continue
                pct_change = (curr_val-prev_val)/abs(prev_val)*100
                if abs(pct_change)>=threshold_pct_change:
                    change_records.append({
                        'BM': bm, 'Partner Name_x': partner, 'KPI': kpi,
                        'Year From': prev['FY_Year'], 'Year To': curr['FY_Year'],
                        'Previous Value': round(prev_val,2), 'Current Value': round(curr_val,2),
                        '% Change': round(pct_change,2)
                    })
    return pd.DataFrame(change_records)

def generate_drastic_change_insight(df, level_name=None, level_value=None):
    change_df = detect_drastic_changes(df)
    if change_df.empty:
        return {level_value if level_value else "All": "No significant changes detected."}
    
    bm_insights = {}
    for bm, group in change_df.groupby('BM'):
        combined = f"{level_name}: {level_value if level_value else bm}\n" if level_name else f"BM: {bm}\n"
        top_partner = (
            group.groupby('Partner Name_x')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(top_partners)
            .index
        )
        for partner in top_partner:
            partner_data = group[group['Partner Name_x'] == partner]
            prompt = f"""
{level_name}: {level_value if level_value else bm}
Partner: {partner}
Data:
{partner_data}
Analyze KPI changes using ONLY the provided data.
Write insights for {level_name} in this structure:
1. Largest KPI Change – identify the KPI, partner, and magnitude of change.
2. Reason & Risk – explain why this change happened based on data patterns (e.g., low base, spike/drop) and its business risk.
3. Action – give MF-sales-specific steps for {level_name} (partner engagement, mid-tier activation, continuity plans). Avoid generic advice.
Limit to 150–200 words. Use numbers from the data.
"""
            insight = generator.generate_insight(prompt)
            combined += f"\nPartner: {partner}\n{insight}\n" + ("-"*80) + "\n"
        bm_insights[bm] = combined
    return bm_insights

def generate_focus_area_insight(df, level_name=None, level_value=None):
    focus_records = []
    for (bm, partner), group in df.groupby(['BM','Partner Name_x']):
        partner_row = group.iloc[-1]
        gaps = {}
        for kpi in kpi_list:
            target_cols = [col for col in df.columns if 'Target' in col and kpi.split()[0] in col]
            target_val = partner_row[target_cols[0]] if target_cols else np.nan
            gap_pct = ((target_val - partner_row[kpi])/target_val*100 if target_val not in [0, None, np.nan] else 0)
            gaps[kpi] = round(gap_pct,2)
        max_gap = max(gaps.values())
        sorted_gaps = sorted(gaps.items(), key=lambda x:x[1], reverse=True)
        focus_records.append({'BM':bm,'Partner Name_x':partner,'Focus KPIs':sorted_gaps,'Max Gap':max_gap})
    
    focus_df = pd.DataFrame(focus_records)
    top_bm = focus_df.groupby('BM')['Max Gap'].max().sort_values(ascending=False).head(top_bms).index
    filtered = focus_df[focus_df['BM'].isin(top_bm)]
    top_data = (
        filtered.groupby('BM',group_keys=False)
        .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
        .reset_index(drop=True)
    )
    
    bm_insights = {}
    for bm, group in top_data.groupby('BM'):
        bm_insights[bm] = {}
        for _, row in group.iterrows():
            partner = row['Partner Name_x']
            focus_kpis = row['Focus KPIs']
            prompt = f"""
{level_name}: {level_value if level_value else bm}
Partner: {partner}
Data:
{focus_kpis}
Analyze KPIs and gaps using ONLY the provided data.
Write insights for {level_name} in this structure:
1. KPIs to fix – identify top underperforming KPIs per partner.
2. Improvement required – quantify the gap and suggest realistic targets.
3. Action plan – provide specific steps for {level_name} to improve performance.
Limit to 150–200 words. Use numbers from the data.
"""
            insight = generator.generate_insight(prompt)
            bm_insights[bm][partner] = insight
    return bm_insights

# ========== FILTER EXISTING KPIS ==========
# Filter to only include KPIs that exist in final_df
existing_kpis = [kpi for kpi in kpi_list if kpi in final_df.columns]
print(f"Found {len(existing_kpis)} KPIs out of {len(kpi_list)} in the dataframe")
kpi_list = existing_kpis

# ========== MAIN EXECUTION ==========
final_insights = {}

top_zms = final_df.groupby('ZM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index

for zm in top_zms:
    zm_group = final_df[final_df['ZM'] == zm]
    final_insights[zm] = {}

    # TOP 1 BM under this ZM
    top_bm = zm_group.groupby('BM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index
    bm_df = zm_group[zm_group['BM'].isin(top_bm)]

    final_insights[zm]['ZM_Overview'] = {}
    for kpi in kpi_list:
        final_insights[zm]['ZM_Overview'][kpi] = {
            'Partner Concentration': generate_partner_concentration_insight(bm_df, kpi, level_name="ZM", level_value=zm),
            'Leaders & Laggers': generate_leaders_laggers_insight(bm_df, kpi, level_name="ZM", level_value=zm),
            'Drastic Change': generate_drastic_change_insight(bm_df, level_name="ZM", level_value=zm),
            'Areas to Focus': generate_focus_area_insight(bm_df, level_name="ZM", level_value=zm)
        }

    # BM LEVEL
    for bm in top_bm:
        bm_group = zm_group[zm_group['BM'] == bm]
        final_insights[zm][bm] = {}

        for kpi in kpi_list:
            final_insights[zm][bm][kpi] = {
                'Partner Concentration': generate_partner_concentration_insight(bm_group, kpi, level_name="BM", level_value=bm),
                'Leaders & Laggers': generate_leaders_laggers_insight(bm_group, kpi, level_name="BM", level_value=bm),
                'Drastic Change': generate_drastic_change_insight(bm_group, level_name="BM", level_value=bm),
                'Areas to Focus': generate_focus_area_insight(bm_group, level_name="BM", level_value=bm)
            }

        # TOP 1 RH
        top_rh = (
            bm_group.groupby('Relationship Handler')[kpi_list]
            .sum().sum(axis=1)
            .sort_values(ascending=False)
            .head(1).index
        )

        for rh in top_rh:
            rh_group = bm_group[bm_group['Relationship Handler'] == rh]
            final_insights[zm][bm][rh] = {}
            for kpi in kpi_list:
                final_insights[zm][bm][rh][kpi] = {
                    'Partner Concentration': generate_partner_concentration_insight(rh_group, kpi, level_name="RH", level_value=rh),
                    'Leaders & Laggers': generate_leaders_laggers_insight(rh_group, kpi, level_name="RH", level_value=rh),
                    'Drastic Change': generate_drastic_change_insight(rh_group, level_name="RH", level_value=rh),
                    'Areas to Focus': generate_focus_area_insight(rh_group, level_name="RH", level_value=rh)
                }

print("Analysis completed successfully!")

Found 81 KPIs out of 84 in the dataframe


Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x00000181AB81BE10>>
Traceback (most recent call last):
  File "C:\KD\py3env\Lib\site-packages\ipykernel\ipkernel.py", line 775, in _clean_thread_parent_frames
KeyboardInterrupt: 

KeyboardInterrupt



In [171]:
MIS_total.columns

Index(['ZM', 'SRM', 'RM', 'BM', 'Broker Code', 'Partner Name', 'Category',
       'Doer Name', 'Doer Type', 'Center', 'Age in NJ', 'Age in NJ Target',
       'Total AUM FY 23-24 Q4 YTD', 'Equity+ Hyb AUM FY 23-24 Q4 YTD',
       'LIVE SIP FY 23-24 Q4 YTD', 'Total Net Sales\nFY 23-24 YTD',
       'Equity Net Sales\nFY 23-24 YTD', 'Net SIP\nFY 23-24 YTD',
       'MARS AUM FY 23-24 YTD', 'MARS Net Sales FY 23-24 YTD',
       'PMS AUM FY 23-24 YTD', 'Net Sales FY 23-24 YTD',
       'Clients Acquired FY 23-24 YTD', 'Live Accounts FY 23-24 YTD',
       'Saturday School (YTD) FY 23-24 Q4 YTD',
       'Investment\nSaturday School (YTD) FY 23-24 Q4 YTD',
       'Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD',
       'Total Group FY 23-24 Q4 YTD', 'Group Covered FY 23-24 Q4 YTD',
       '% Covered', 'Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr)',
       'Total Reviews\nFY 23-24 Q4 YTD', 'Amount FY 23-24 Q4 YTD\n(in Cr)',
       'Flexicap Target\nFY 23-24 Q4 YTD\n(in Cr)',
       'Flexicap Ach\nFY 23

In [176]:
bn_df.columns

Index(['Sr No.', 'Partner Code', 'Partner Name', 'Center', 'Category',
       'Relationship Handler', 'Investment Net Sales Target',
       'Investment Net Sales Achievement',
       'Investment Net Sales % Achievement', 'Equity Sales', 'MIP Sales',
       'Gold Sales', 'Sales in Physical Assets ', 'Sales in Direct Equity ',
       'FD + Bond (Primary Market) Sales', 'Secondary Market Bond Sales',
       'Net Sales Through Realty', 'Net NJ PMS Sales', 'Net Non-NJ PMS Sales',
       'Net Sales through MARS', 'SIP Sales Target', 'SIP Sales Achievement',
       'SIP Sales % Achievement', 'Fresh Gross SIP Sales',
       'SIP Closure / Termination', 'FY_Year'],
      dtype='object', name=0)

###token

In [54]:
import pandas as pd
import numpy as np
import google.generativeai as genai
import tiktoken

# ================= GEMINI SETUP =================
gemini_api_key = "AIzaSyADwv71iu5J5m9TK0oV2lDgZxQrTLkg1K0"
genai.configure(api_key=gemini_api_key)

# Add this inside your DeepInsightGenerator class
class DeepInsightGenerator:
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
        import tiktoken
        self.enc = tiktoken.encoding_for_model("gpt-4")  # approximate for Gemini Flash
        self.total_tokens_used = 0  # track total tokens
    
    def generate_insight(self, prompt: str, max_words: int = 200) -> str:
        try:
            # Count prompt tokens
            prompt_tokens = len(self.enc.encode(prompt))
            
            response = self.model.generate_content(prompt)
            words = response.text.strip().split()
            generated_text = " ".join(words[:max_words])
            
            # Approximate output tokens (1 token ~ 0.75 words)
            output_tokens = int(len(generated_text.split()) / 0.75)
            total_tokens = prompt_tokens + output_tokens
            
            # Track cumulative tokens
            self.total_tokens_used += total_tokens
            print(f"Prompt tokens: {prompt_tokens}, Output tokens: {output_tokens}, Total tokens: {total_tokens}, Cumulative tokens: {self.total_tokens_used}")
            
            return generated_text
        except Exception as e:
            return f"Error generating insight: {e}"


generator = DeepInsightGenerator()

# ================= CONFIG =================
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]
thresholds = [25, 50, 75, 80, 90]

top_bms = 1
top_partners = 1
top_n = 1  
threshold_pct_change = 50  

kpi_prompts = {
    'Equity Sales': "Analyze Equity Sales performance.",
    'SIP Sales Achievement': "Analyze SIP Sales Achievement performance.",
    'Net Sales through MARS': "Analyze Net Sales through MARS performance.",
    'Investment Net Sales Achievement': "Analyze Investment Net Sales Achievement."
}

# ================= HELPER FUNCTIONS =================
def partner_concentration(df, kpi):
    df_sorted = df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
    total_kpi = df_sorted[kpi].sum()
    df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
    df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100 if total_kpi != 0 else 0
    total_partners = len(df_sorted)
    conc_summary = {}
    for t in thresholds:
        if not df_sorted.empty:
            num_partners = (df_sorted['Cumulative KPI %'] >= t).idxmax() + 1
            conc_summary[f'Partners for {t}%'] = num_partners
            conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)
    return conc_summary, total_partners, df_sorted

def generate_partner_concentration_insight(df, kpi, level_name=None, level_value=None):
    conc_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        conc_summary, total_partners, df_sorted = partner_concentration(bm_df, kpi)
        text = f"{level_name}: {level_value if level_value else bm}\nKPI: {kpi}\nTotal Partners: {total_partners}\n"
        for t in thresholds:
            text += f"- Top {conc_summary.get(f'Partners for {t}%', 0)} partners ({conc_summary.get(f'% of Total Partners for {t}%',0)}%) contribute {t}% of total {kpi}.\n"
        topX = df_sorted.head(top_partners)[['Partner Name_x', kpi, 'Cumulative KPI %']]
        text += "\nTop Partner:\n"
        for _, row in topX.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['Cumulative KPI %']:.2f}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Generate insights ONLY from the data provided — do not give generic advices.
Write insights in this exact structure:
1. Concentration level 
2. Risk/impact 
3. Action recommendation Limit to 200 words
Limit to 200 words.
"""
        insight = generator.generate_insight(prompt)
        conc_texts[bm] = text + "\nLLM Insight:\n" + insight
    return conc_texts

def generate_leaders_laggers_insight(df, kpi, level_name=None, level_value=None):
    ll_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        perf = bm_df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
        total_perf = perf[kpi].sum() if perf[kpi].sum()!=0 else 1
        leaders = perf.head(top_n).copy()
        leaders['% Share'] = round(leaders[kpi]/total_perf*100,2)
        laggers = perf.tail(top_n).copy()
        laggers['% Share'] = round(laggers[kpi]/total_perf*100,2)
        text = f"{level_name}: {level_value if level_value else bm}\nKPI: {kpi}\nTop Leader:\n"
        for _, row in leaders.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        text += "\nBottom Lagger:\n"
        for _, row in laggers.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Analyze the provided KPI data ONLY. Do NOT give generic advice.
Write insights for {level_name} in this structure:
1. Dependency on leader – identify top partner and quantify impact.
2. Fix for lagger – identify bottom partner and recommend improvement.
3. Impact on {level_name} – explain how top/bottom performers affect overall performance.
Limit output to 150–200 words. Use data values explicitly.
"""
        insight = generator.generate_insight(prompt)
        ll_texts[bm] = text + "\nLLM Insight:\n" + insight
    return ll_texts

def detect_drastic_changes(df):
    change_records = []
    df_sorted = df.sort_values(['BM', 'Partner Name_x', 'FY_Year'])
    for (bm, partner), group in df_sorted.groupby(['BM','Partner Name_x']):
        group = group.sort_values('FY_Year')
        for i in range(1,len(group)):
            prev, curr = group.iloc[i-1], group.iloc[i]
            for kpi in kpi_list:
                prev_val, curr_val = prev[kpi], curr[kpi]
                if pd.isna(prev_val) or pd.isna(curr_val) or prev_val==0: continue
                pct_change = (curr_val-prev_val)/abs(prev_val)*100
                if abs(pct_change)>=threshold_pct_change:
                    change_records.append({
                        'BM': bm, 'Partner Name_x': partner, 'KPI': kpi,
                        'Year From': prev['FY_Year'], 'Year To': curr['FY_Year'],
                        'Previous Value': round(prev_val,2), 'Current Value': round(curr_val,2),
                        '% Change': round(pct_change,2)
                    })
    return pd.DataFrame(change_records)

def generate_drastic_change_insight(df, level_name=None, level_value=None):
    change_df = detect_drastic_changes(df)
    if change_df.empty:
        return {level_value if level_value else "All": "No significant changes detected."}
    
    bm_insights = {}
    for bm, group in change_df.groupby('BM'):
        combined = f"{level_name}: {level_value if level_value else bm}\n" if level_name else f"BM: {bm}\n"
        top_partner = (
            group.groupby('Partner Name_x')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(top_partners)
            .index
        )
        for partner in top_partner:
            partner_data = group[group['Partner Name_x'] == partner]
            prompt = f"""
{level_name}: {level_value if level_value else bm}
Partner: {partner}
Data:
{partner_data}
Analyze KPI changes using ONLY the provided data.
Write insights for {level_name} in this structure:
1. Largest KPI Change – identify the KPI, partner, and magnitude of change.
2. Reason & Risk – explain why this change happened based on data patterns (e.g., low base, spike/drop) and its business risk.
3. Action – give MF-sales-specific steps for {level_name} (partner engagement, mid-tier activation, continuity plans). Avoid generic advice.
Limit to 150–200 words. Use numbers from the data.
"""
            insight = generator.generate_insight(prompt)
            combined += f"\nPartner: {partner}\n{insight}\n" + ("-"*80) + "\n"
        bm_insights[bm] = combined
    return bm_insights

def generate_focus_area_insight(df, level_name=None, level_value=None):
    focus_records = []
    for (bm, partner), group in df.groupby(['BM','Partner Name_x']):
        partner_row = group.iloc[-1]
        gaps = {}
        for kpi in kpi_list:
            target_cols = [col for col in df.columns if 'Target' in col and kpi.split()[0] in col]
            target_val = partner_row[target_cols[0]] if target_cols else np.nan
            gap_pct = ((target_val - partner_row[kpi])/target_val*100 if target_val not in [0, None, np.nan] else 0)
            gaps[kpi] = round(gap_pct,2)
        max_gap = max(gaps.values())
        sorted_gaps = sorted(gaps.items(), key=lambda x:x[1], reverse=True)
        focus_records.append({'BM':bm,'Partner Name_x':partner,'Focus KPIs':sorted_gaps,'Max Gap':max_gap})
    
    focus_df = pd.DataFrame(focus_records)
    top_bm = focus_df.groupby('BM')['Max Gap'].max().sort_values(ascending=False).head(top_bms).index
    filtered = focus_df[focus_df['BM'].isin(top_bm)]
    top_data = (
        filtered.groupby('BM',group_keys=False)
        .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
        .reset_index(drop=True)
    )
    
    bm_insights = {}
    for bm, group in top_data.groupby('BM'):
        bm_insights[bm] = {}
        for _, row in group.iterrows():
            partner = row['Partner Name_x']
            focus_kpis = row['Focus KPIs']
            prompt = f"""
{level_name}: {level_value if level_value else bm}
Partner: {partner}
Data:
{focus_kpis}
Analyze KPIs and gaps using ONLY the provided data.
Write insights for {level_name} in this structure:
1. KPIs to fix – identify top underperforming KPIs per partner.
2. Improvement required – quantify the gap and suggest realistic targets.
3. Action plan – provide specific steps for {level_name} to improve performance.
Limit to 150–200 words. Use numbers from the data.
"""
            insight = generator.generate_insight(prompt)
            bm_insights[bm][partner] = insight
    return bm_insights

# ========== MAIN EXECUTION ==========
final_insights = {}

top_zms = final_df.groupby('ZM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index

for zm in top_zms:
    zm_group = final_df[final_df['ZM'] == zm]
    final_insights[zm] = {}

    # TOP 1 BM under this ZM
    top_bm = zm_group.groupby('BM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index
    bm_df = zm_group[zm_group['BM'].isin(top_bm)]

    final_insights[zm]['ZM_Overview'] = {}
    for kpi in kpi_list:
        final_insights[zm]['ZM_Overview'][kpi] = {
            'Partner Concentration': generate_partner_concentration_insight(bm_df, kpi, level_name="ZM", level_value=zm),
            'Leaders & Laggers': generate_leaders_laggers_insight(bm_df, kpi, level_name="ZM", level_value=zm),
            'Drastic Change': generate_drastic_change_insight(bm_df, level_name="ZM", level_value=zm),
            'Areas to Focus': generate_focus_area_insight(bm_df, level_name="ZM", level_value=zm)
        }

    # BM LEVEL
    for bm in top_bm:
        bm_group = zm_group[zm_group['BM'] == bm]
        final_insights[zm][bm] = {}

        for kpi in kpi_list:
            final_insights[zm][bm][kpi] = {
                'Partner Concentration': generate_partner_concentration_insight(bm_group, kpi, level_name="BM", level_value=bm),
                'Leaders & Laggers': generate_leaders_laggers_insight(bm_group, kpi, level_name="BM", level_value=bm),
                'Drastic Change': generate_drastic_change_insight(bm_group, level_name="BM", level_value=bm),
                'Areas to Focus': generate_focus_area_insight(bm_group, level_name="BM", level_value=bm)
            }

        # TOP 1 RH
        top_rh = (
            bm_group.groupby('Relationship Handler')[kpi_list]
            .sum().sum(axis=1)
            .sort_values(ascending=False)
            .head(1).index
        )

        for rh in top_rh:
            rh_group = bm_group[bm_group['Relationship Handler'] == rh]
            final_insights[zm][bm][rh] = {}
            for kpi in kpi_list:
                final_insights[zm][bm][rh][kpi] = {
                    'Partner Concentration': generate_partner_concentration_insight(rh_group, kpi, level_name="RH", level_value=rh),
                    'Leaders & Laggers': generate_leaders_laggers_insight(rh_group, kpi, level_name="RH", level_value=rh),
                    'Drastic Change': generate_drastic_change_insight(rh_group, level_name="RH", level_value=rh),
                    'Areas to Focus': generate_focus_area_insight(rh_group, level_name="RH", level_value=rh)
                }

Prompt tokens: 208, Output tokens: 156, Total tokens: 364, Cumulative tokens: 364
Prompt tokens: 175, Output tokens: 198, Total tokens: 373, Cumulative tokens: 737
Prompt tokens: 527, Output tokens: 256, Total tokens: 783, Cumulative tokens: 1520


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


Prompt tokens: 160, Output tokens: 205, Total tokens: 365, Cumulative tokens: 1885
Prompt tokens: 213, Output tokens: 194, Total tokens: 407, Cumulative tokens: 2292
Prompt tokens: 177, Output tokens: 213, Total tokens: 390, Cumulative tokens: 2682
Prompt tokens: 527, Output tokens: 266, Total tokens: 793, Cumulative tokens: 3475


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


Prompt tokens: 160, Output tokens: 198, Total tokens: 358, Cumulative tokens: 3833
Prompt tokens: 227, Output tokens: 194, Total tokens: 421, Cumulative tokens: 4254
Prompt tokens: 177, Output tokens: 213, Total tokens: 390, Cumulative tokens: 4644
Prompt tokens: 527, Output tokens: 246, Total tokens: 773, Cumulative tokens: 5417


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


Prompt tokens: 160, Output tokens: 214, Total tokens: 374, Cumulative tokens: 5791
Prompt tokens: 221, Output tokens: 186, Total tokens: 407, Cumulative tokens: 6198
Prompt tokens: 179, Output tokens: 213, Total tokens: 392, Cumulative tokens: 6590
Prompt tokens: 527, Output tokens: 245, Total tokens: 772, Cumulative tokens: 7362


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


Prompt tokens: 160, Output tokens: 204, Total tokens: 364, Cumulative tokens: 7726
Prompt tokens: 206, Output tokens: 172, Total tokens: 378, Cumulative tokens: 8104
Prompt tokens: 171, Output tokens: 206, Total tokens: 377, Cumulative tokens: 8481
Prompt tokens: 523, Output tokens: 217, Total tokens: 740, Cumulative tokens: 9221


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


Prompt tokens: 156, Output tokens: 256, Total tokens: 412, Cumulative tokens: 9633
Prompt tokens: 211, Output tokens: 174, Total tokens: 385, Cumulative tokens: 10018
Prompt tokens: 173, Output tokens: 214, Total tokens: 387, Cumulative tokens: 10405
Prompt tokens: 523, Output tokens: 212, Total tokens: 735, Cumulative tokens: 11140


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


Prompt tokens: 156, Output tokens: 217, Total tokens: 373, Cumulative tokens: 11513
Prompt tokens: 225, Output tokens: 172, Total tokens: 397, Cumulative tokens: 11910
Prompt tokens: 173, Output tokens: 193, Total tokens: 366, Cumulative tokens: 12276
Prompt tokens: 523, Output tokens: 228, Total tokens: 751, Cumulative tokens: 13027


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


Prompt tokens: 156, Output tokens: 237, Total tokens: 393, Cumulative tokens: 13420
Prompt tokens: 219, Output tokens: 178, Total tokens: 397, Cumulative tokens: 13817
Prompt tokens: 175, Output tokens: 236, Total tokens: 411, Cumulative tokens: 14228
Prompt tokens: 523, Output tokens: 236, Total tokens: 759, Cumulative tokens: 14987


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


Prompt tokens: 156, Output tokens: 193, Total tokens: 349, Cumulative tokens: 15336
Prompt tokens: 203, Output tokens: 152, Total tokens: 355, Cumulative tokens: 15691
Prompt tokens: 169, Output tokens: 177, Total tokens: 346, Cumulative tokens: 16037
Prompt tokens: 342, Output tokens: 224, Total tokens: 566, Cumulative tokens: 16603


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


Prompt tokens: 154, Output tokens: 145, Total tokens: 299, Cumulative tokens: 16902
Prompt tokens: 210, Output tokens: 198, Total tokens: 408, Cumulative tokens: 17310
Prompt tokens: 164, Output tokens: 221, Total tokens: 385, Cumulative tokens: 17695
Prompt tokens: 342, Output tokens: 229, Total tokens: 571, Cumulative tokens: 18266


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


Prompt tokens: 154, Output tokens: 170, Total tokens: 324, Cumulative tokens: 18590
Prompt tokens: 224, Output tokens: 162, Total tokens: 386, Cumulative tokens: 18976
Prompt tokens: 171, Output tokens: 245, Total tokens: 416, Cumulative tokens: 19392
Prompt tokens: 342, Output tokens: 232, Total tokens: 574, Cumulative tokens: 19966


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


Prompt tokens: 154, Output tokens: 212, Total tokens: 366, Cumulative tokens: 20332
Prompt tokens: 216, Output tokens: 192, Total tokens: 408, Cumulative tokens: 20740
Prompt tokens: 172, Output tokens: 226, Total tokens: 398, Cumulative tokens: 21138
Prompt tokens: 342, Output tokens: 254, Total tokens: 596, Cumulative tokens: 21734


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))


Prompt tokens: 154, Output tokens: 158, Total tokens: 312, Cumulative tokens: 22046


In [None]:
160, Output tokens: 205, Total tokens: 365, Cumulative tokens: 1885
Prompt tokens: 213, Output tokens: 194, Total tokens: 407, Cumulative tokens: 2292
Prompt tokens: 177, Output tokens: 213, Total tokens: 390, Cumulative tokens: 2682
Prompt tokens: 527, Output tokens: 266, Total tokens: 793, Cumulative tokens: 3475

In [122]:
22046 * 0.5

11023.0

In [121]:
365 + 194+213+266

1038

In [52]:
!pip install tiktoken





[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
RM - 1 - KPI - 4 PARTNERS - ALL
ZM - 1
BM - 1


In [123]:
185 * 11023

2039255

In [124]:
185 * 11023

2039255

In [126]:
2039255/1000000 * 0.10 

0.20392549999999998

In [127]:
2039255/1000000 * 0.4

0.8157019999999999

In [129]:
(0.2 + 0.8) * 80

80.0

In [130]:
bn_df.columns

Index(['Sr No.', 'Partner Code', 'Partner Name', 'Center', 'Category',
       'Relationship Handler', 'Investment Net Sales Target',
       'Investment Net Sales Achievement',
       'Investment Net Sales % Achievement', 'Equity Sales', 'MIP Sales',
       'Gold Sales', 'Sales in Physical Assets ', 'Sales in Direct Equity ',
       'FD + Bond (Primary Market) Sales', 'Secondary Market Bond Sales',
       'Net Sales Through Realty', 'Net NJ PMS Sales', 'Net Non-NJ PMS Sales',
       'Net Sales through MARS', 'SIP Sales Target', 'SIP Sales Achievement',
       'SIP Sales % Achievement', 'Fresh Gross SIP Sales',
       'SIP Closure / Termination', 'FY_Year'],
      dtype='object', name=0)

In [132]:
MIS_total.columns

Index(['ZM', 'SRM', 'RM', 'BM', 'Broker Code', 'Partner Name', 'Category',
       'Doer Name', 'Doer Type', 'Center', 'Age in NJ', 'Age in NJ Target',
       'Total AUM FY 23-24 Q4 YTD', 'Equity+ Hyb AUM FY 23-24 Q4 YTD',
       'LIVE SIP FY 23-24 Q4 YTD', 'Total Net Sales\nFY 23-24 YTD',
       'Equity Net Sales\nFY 23-24 YTD', 'Net SIP\nFY 23-24 YTD',
       'MARS AUM FY 23-24 YTD', 'MARS Net Sales FY 23-24 YTD',
       'PMS AUM FY 23-24 YTD', 'Net Sales FY 23-24 YTD',
       'Clients Acquired FY 23-24 YTD', 'Live Accounts FY 23-24 YTD',
       'Saturday School (YTD) FY 23-24 Q4 YTD',
       'Investment\nSaturday School (YTD) FY 23-24 Q4 YTD',
       'Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD',
       'Total Group FY 23-24 Q4 YTD', 'Group Covered FY 23-24 Q4 YTD',
       '% Covered', 'Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr)',
       'Total Reviews\nFY 23-24 Q4 YTD', 'Amount FY 23-24 Q4 YTD\n(in Cr)',
       'Flexicap Target\nFY 23-24 Q4 YTD\n(in Cr)',
       'Flexicap Ach\nFY 23

In [143]:
fy23_SC = pd.read_excel("FY23 - YTD - Partner Score Card - Scores.xls")
fy24_SC = pd.read_excel("FY24 - YTD - Partner Score Card - Scores.xls")
fy25_SC = pd.read_excel("FY25 - YTD - Partner Score Card - Scores.xls")

In [160]:
fy25_SC = pd.read_excel("FY25 - YTD - Partner Score Card - Scores.xls")

In [161]:
# Make first row the header
fy25_SC.columns = fy25_SC.iloc[0]   
fy25_SC = fy25_SC[1:]              
fy25_SC = fy25_SC.reset_index(drop=True) 

In [162]:
import pandas as pd
cols = fy25_SC.columns.to_list()
new_cols = []
last_val = None
for c in cols:
    if pd.notna(c):
        last_val = c
        new_cols.append(c)
    else:
        new_cols.append(last_val)
fy25_SC.columns = new_cols

sub_cols = fy25_SC.iloc[0].fillna('')
fy25_SC = fy25_SC[1:]  # Remove the 0th row used for sub-headers

fy25_SC.columns = [f"{top}_{sub}" if sub != '' else top for top, sub in zip(fy25_SC.columns, sub_cols)]

fy25_SC.reset_index(drop=True, inplace=True)

print(fy25_SC.columns)


Index(['Sr No', 'Broker Code', 'Partner Name', 'Doer Name', 'Doer Type',
       'Center', 'Equity Net Sales_Target', 'Equity Net Sales_Achievement',
       'Equity Net Sales_% Achievement', 'Insurance_Target',
       'Insurance_Achievement', 'Insurance_% Achievement', 'SIP Sales_Target',
       'SIP Sales_Achievement', 'SIP Sales_% Achievement',
       'Client Acquisition_Target', 'Client Acquisition_Achievement',
       'Client Acquisition_% Achievement', 'LAS_Target', 'LAS_Achievement',
       'LAS_% Achievement', 'SIP to Net Sales Ratio_SIP Input Value',
       'SIP to Net Sales Ratio_Ratio', 'Total % Achievement'],
      dtype='object')


In [163]:
fy25_SC.head()

Unnamed: 0,Sr No,Broker Code,Partner Name,Doer Name,Doer Type,Center,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement
0,1,42342,ABHISHEK KUMAR,GUNJAN KUMAR ROY,UNIT MANAGER,AGRA,2721600,6748802.27,247.97,28000.0,92237.5,329.42,68040,453382.33,666.35,25.5,68,266.67,1331.31,464000,200,309484.81,68.23,395.32
1,2,42032,AJAJ AHMED,GUNJAN KUMAR ROY,UNIT MANAGER,AGRA,3175200,53890.98,1.7,16800.0,13777.96,82.01,79380,7124.71,8.98,29.75,18,60.5,177.56,0,0,11499.49,2.54,23.2
2,3,41167,AMAN GUPTA,GUNJAN KUMAR ROY,UNIT MANAGER,AGRA,3175200,964994.72,30.39,33600.0,0.0,0.0,79380,98245.11,123.77,29.75,2,6.72,219.92,0,0,65496.74,14.44,46.32
3,4,26185,AMIT GANGWAR,SAURABH AGRAWAL,UNIT MANAGER,AGRA,8709120,1649413.22,18.94,134400.0,0.0,0.0,190512,26098.76,13.7,51.0,14,27.45,3102.3,0,0,84495.9,11.64,12.02
4,5,24259,ANKUR SAXENA,SAURABH AGRAWAL,UNIT MANAGER,AGRA,8709120,2641076.48,30.33,180173.6,42874.0,23.8,190512,-125543.97,-65.9,51.0,14,27.45,5441.98,44000,200,166092.04,22.89,16.36


In [164]:
SC_totals = pd.concat([fy23_SC,fy24_SC,fy25_SC], axis = 0)

In [166]:
SC_totals.columns

Index(['Sr No', 'Broker Code', 'Partner Name', 'Doer Name', 'Doer Type',
       'Center', 'Equity Net Sales_Target', 'Equity Net Sales_Achievement',
       'Equity Net Sales_% Achievement', 'Insurance_Target',
       'Insurance_Achievement', 'Insurance_% Achievement', 'SIP Sales_Target',
       'SIP Sales_Achievement', 'SIP Sales_% Achievement',
       'Client Acquisition_Target', 'Client Acquisition_Achievement',
       'Client Acquisition_% Achievement', 'LAS_Target', 'LAS_Achievement',
       'LAS_% Achievement', 'SIP to Net Sales Ratio_SIP Input Value',
       'SIP to Net Sales Ratio_Ratio', 'Total % Achievement'],
      dtype='object')

In [47]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ================= GEMINI SETUP =================
gemini_api_key = "AIzaSyADwv71iu5J5m9TK0oV2lDgZxQrTLkg1K0"
genai.configure(api_key=gemini_api_key)

class DeepInsightGenerator:
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str, max_words: int = 200) -> str:
        try:
            response = self.model.generate_content(prompt)
            words = response.text.strip().split()
            return " ".join(words[:max_words])
        except Exception as e:
            return f"Error generating insight: {e}"

generator = DeepInsightGenerator()

# ================= CONFIG =================
# Original KPIs
original_kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]

# New KPIs based on your requirements
new_kpi_list = [
    # PMS KPIs
    'PMS AUM FY 24-25 Q4 YTD',          # For PMS Concentration
    'PMS_Active_Partners',              # We'll create this column
    
    # MARS KPIs
    'MARS AUM FY 24-25 Q4 YTD',         # For MARS Concentration
    'MARS_Active_Partners',             # We'll create this column
    
    # Client Acquisition
    'Clients Acquired FY 24-25 Q4 YTD',
    
    # Saturday School KPIs
    'Saturday School (YTD) FY 24-25 Q4 YTD',  # For concentration and correlation
    
    # NJ AMC KPIs
    'AMC NS Ach\nFY 24-25 Q4 YTD',
    
    # LAS KPIs
    'LAS_Achievement',
    
    # Additional for correlation analysis
    'Equity Net Sales\nFY 24-25 Q4 YTD'
]

# Combined KPI list
kpi_list = original_kpi_list + new_kpi_list

thresholds = [25, 50, 75, 80, 90]
top_bms = 1
top_partners = 1
top_n = 1  
threshold_pct_change = 50  

# ================= UPDATE KPI PROMPTS =================
kpi_prompts = {
    # Original KPIs
    'Equity Sales': "Analyze Equity Sales performance.",
    'SIP Sales Achievement': "Analyze SIP Sales Achievement performance.",
    'Net Sales through MARS': "Analyze Net Sales through MARS performance.",
    'Investment Net Sales Achievement': "Analyze Investment Net Sales Achievement.",
    
    # New KPIs
    'PMS AUM FY 24-25 Q4 YTD': "Analyze PMS AUM concentration and partner activity.",
    'PMS_Active_Partners': "Analyze number of active PMS partners and participation rate.",
    'MARS AUM FY 24-25 Q4 YTD': "Analyze MARS AUM concentration and partner activity.",
    'MARS_Active_Partners': "Analyze number of active MARS partners and participation rate.",
    'Clients Acquired FY 24-25 Q4 YTD': "Analyze client acquisition concentration among partners.",
    'Saturday School (YTD) FY 24-25 Q4 YTD': "Analyze Saturday School participation and its impact.",
    'AMC NS Ach\nFY 24-25 Q4 YTD': "Analyze NJ AMC Net Sales performance and partner participation.",
    'LAS_Achievement': "Analyze LAS (Loan Against Securities) achievement and partner participation.",
    'Equity Net Sales\nFY 24-25 Q4 YTD': "Analyze Equity Net Sales performance for correlation analysis."
}

# ================= PREPARE DATA FOR NEW KPIs =================
def prepare_active_partners_columns(df):
    """Create active partner columns for PMS and MARS"""
    df = df.copy()
    
    # For PMS: Create binary column indicating active PMS partners (AUM > 0)
    if 'PMS AUM FY 24-25 Q4 YTD' in df.columns:
        df['PMS_Active_Partners'] = df['PMS AUM FY 24-25 Q4 YTD'].apply(lambda x: 1 if x > 0 else 0)
    
    # For MARS: Create binary column indicating active MARS partners (AUM > 0)
    if 'MARS AUM FY 24-25 Q4 YTD' in df.columns:
        df['MARS_Active_Partners'] = df['MARS AUM FY 24-25 Q4 YTD'].apply(lambda x: 1 if x > 0 else 0)
    
    return df

# Prepare the data
final_df = prepare_active_partners_columns(final_df)

# ================= HELPER FUNCTIONS (YOUR EXACT FUNCTIONS - UPDATED) =================
def partner_concentration(df, kpi):
    df_sorted = df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
    total_kpi = df_sorted[kpi].sum()
    df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
    df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100 if total_kpi != 0 else 0
    total_partners = len(df_sorted)
    conc_summary = {}
    for t in thresholds:
        if not df_sorted.empty:
            num_partners = (df_sorted['Cumulative KPI %'] >= t).idxmax() + 1
            conc_summary[f'Partners for {t}%'] = num_partners
            conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)
    return conc_summary, total_partners, df_sorted

def generate_partner_concentration_insight(df, kpi, level_name=None, level_value=None):
    conc_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        conc_summary, total_partners, df_sorted = partner_concentration(bm_df, kpi)
        text = f"{level_name}: {level_value if level_value else bm}\nKPI: {kpi}\nTotal Partners: {total_partners}\n"
        for t in thresholds:
            text += f"- Top {conc_summary.get(f'Partners for {t}%', 0)} partners ({conc_summary.get(f'% of Total Partners for {t}%',0)}%) contribute {t}% of total {kpi}.\n"
        topX = df_sorted.head(top_partners)[['Partner Name_x', kpi, 'Cumulative KPI %']]
        text += "\nTop Partner:\n"
        for _, row in topX.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['Cumulative KPI %']:.2f}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Generate insights ONLY from the data provided — No generic lines or introduction, No assumptions beyond the data.
Write insights in this exact structure:
1. Concentration level 
2. Risk/impact 
3. Action recommendation
Limit to 200 words.
"""
        insight = generator.generate_insight(prompt)
        conc_texts[bm] = text + "\nLLM Insight:\n" + insight
    return conc_texts

def generate_leaders_laggers_insight(df, kpi, level_name=None, level_value=None):
    ll_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        perf = bm_df.groupby('Partner Name_x')[kpi].sum().sort_values(ascending=False).reset_index()
        total_perf = perf[kpi].sum() if perf[kpi].sum()!=0 else 1
        leaders = perf.head(top_n).copy()
        leaders['% Share'] = round(leaders[kpi]/total_perf*100,2)
        laggers = perf.tail(top_n).copy()
        laggers['% Share'] = round(laggers[kpi]/total_perf*100,2)
        text = f"{level_name}: {level_value if level_value else bm}\nKPI: {kpi}\nTop Leader:\n"
        for _, row in leaders.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        text += "\nBottom Lagger:\n"
        for _, row in laggers.iterrows():
            text += f"- {row['Partner Name_x']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Analyze the provided KPI data ONLY. Do NOT give generic advice or introduction, No assumptions beyond the data.
Write insights for {level_name} in this structure:
1. Dependency on leader – identify top partner and quantify impact.
2. Fix for lagger – identify bottom partner and recommend improvement.
3. Impact on {level_name} – explain how top/bottom performers affect overall performance.
Limit output to 150–200 words. Use data values explicitly.
"""
        insight = generator.generate_insight(prompt)
        ll_texts[bm] = text + "\nLLM Insight:\n" + insight
    return ll_texts

def detect_drastic_changes(df):
    change_records = []
    df_sorted = df.sort_values(['BM', 'Partner Name_x', 'FY_Year'])
    for (bm, partner), group in df_sorted.groupby(['BM','Partner Name_x']):
        group = group.sort_values('FY_Year')
        for i in range(1,len(group)):
            prev, curr = group.iloc[i-1], group.iloc[i]
            for kpi in kpi_list:
                prev_val, curr_val = prev[kpi], curr[kpi]
                if pd.isna(prev_val) or pd.isna(curr_val) or prev_val==0: continue
                pct_change = (curr_val-prev_val)/abs(prev_val)*100
                if abs(pct_change)>=threshold_pct_change:
                    change_records.append({
                        'BM': bm, 'Partner Name_x': partner, 'KPI': kpi,
                        'Year From': prev['FY_Year'], 'Year To': curr['FY_Year'],
                        'Previous Value': round(prev_val,2), 'Current Value': round(curr_val,2),
                        '% Change': round(pct_change,2)
                    })
    return pd.DataFrame(change_records)

def generate_drastic_change_insight(df, level_name=None, level_value=None):
    change_df = detect_drastic_changes(df)
    if change_df.empty:
        return {level_value if level_value else "All": "No significant changes detected."}
    
    bm_insights = {}
    for bm, group in change_df.groupby('BM'):
        combined = f"{level_name}: {level_value if level_value else bm}\n" if level_name else f"BM: {bm}\n"
        top_partner = (
            group.groupby('Partner Name_x')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(top_partners)
            .index
        )
        for partner in top_partner:
            partner_data = group[group['Partner Name_x'] == partner]
            prompt = f"""
{level_name}: {level_value if level_value else bm}
Partner: {partner}
Data:
{partner_data}
Analyze KPI changes using ONLY the provided data. No generic lines or introduction, No assumptions beyond the data.
Write insights for {level_name} in this structure:
1. Largest KPI Change – identify the KPI, partner, and magnitude of change.
2. Reason & Risk – explain why this change happened based on data patterns (e.g., low base, spike/drop) and its business risk.
3. Action – give MF-sales-specific steps for {level_name} (partner engagement, mid-tier activation, continuity plans). Avoid generic advice.
Limit to 150–200 words. Use numbers from the data.
"""
            insight = generator.generate_insight(prompt)
            combined += f"\nPartner: {partner}\n{insight}\n" + ("-"*80) + "\n"
        bm_insights[bm] = combined
    return bm_insights

def generate_focus_area_insight(df, level_name=None, level_value=None):
    focus_records = []
    for (bm, partner), group in df.groupby(['BM','Partner Name_x']):
        partner_row = group.iloc[-1]
        gaps = {}
        for kpi in kpi_list:
            target_cols = [col for col in df.columns if 'Target' in col and any(word in kpi for word in kpi.split())]
            target_val = partner_row[target_cols[0]] if target_cols else np.nan
            gap_pct = ((target_val - partner_row[kpi])/target_val*100 if target_val not in [0, None, np.nan] else 0)
            gaps[kpi] = round(gap_pct,2)
        max_gap = max(gaps.values())
        sorted_gaps = sorted(gaps.items(), key=lambda x:x[1], reverse=True)
        focus_records.append({'BM':bm,'Partner Name_x':partner,'Focus KPIs':sorted_gaps,'Max Gap':max_gap})
    
    focus_df = pd.DataFrame(focus_records)
    top_bm = focus_df.groupby('BM')['Max Gap'].max().sort_values(ascending=False).head(top_bms).index
    filtered = focus_df[focus_df['BM'].isin(top_bm)]
    top_data = (
        filtered.groupby('BM',group_keys=False)
        .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
        .reset_index(drop=True)
    )
    
    bm_insights = {}
    for bm, group in top_data.groupby('BM'):
        bm_insights[bm] = {}
        for _, row in group.iterrows():
            partner = row['Partner Name_x']
            focus_kpis = row['Focus KPIs']
            prompt = f"""
{level_name}: {level_value if level_value else bm}
Partner: {partner}
Data:
{focus_kpis}
Analyze KPIs and gaps using ONLY the provided data. No generic lines or introduction, No assumptions beyond the data.
Write insights for {level_name} in this structure:
1. KPIs to fix – identify top underperforming KPIs per partner.
2. Improvement required – quantify the gap and suggest realistic targets.
3. Action plan – provide specific steps for {level_name} to improve performance.
Limit to 150–200 words. Use numbers from the data.
"""
            insight = generator.generate_insight(prompt)
            bm_insights[bm][partner] = insight
    return bm_insights

# ========== ADDITIONAL FUNCTIONS FOR NEW REQUIREMENTS ==========

def generate_active_partners_insight(df, kpi, level_name=None, level_value=None):
    """Generate insights for active partners (binary KPIs like PMS_Active_Partners)"""
    active_texts = {}
    top_bms_list = df.groupby('BM')[kpi].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm]
        
        # Calculate active partners statistics
        total_partners = bm_df['Partner Name_x'].nunique()
        active_partners = bm_df[kpi].sum() if kpi in bm_df.columns else 0
        active_percentage = (active_partners / total_partners * 100) if total_partners > 0 else 0
        
        text = f"{level_name}: {level_value if level_value else bm}\n"
        text += f"KPI: {kpi}\n"
        text += f"Total Partners: {total_partners}\n"
        text += f"Active Partners: {active_partners}\n"
        text += f"Active Percentage: {active_percentage:.2f}%\n\n"
        
        # Get top active partners (those with value = 1)
        if kpi in bm_df.columns:
            active_df = bm_df[bm_df[kpi] == 1]
            text += "Active Partners List:\n"
            for partner in active_df['Partner Name_x'].unique()[:10]:  # Show first 10
                text += f"- {partner}\n"
            if len(active_df['Partner Name_x'].unique()) > 10:
                text += f"... and {len(active_df['Partner Name_x'].unique()) - 10} more\n"
        
        prompt = f"""
{kpi_prompts[kpi]}
Data:
{text}
Generate insights ONLY from the data provided — No generic lines or introduction, No assumptions beyond the data.
Write insights in this exact structure:
1. Participation Level – analyze the percentage of active partners
2. Engagement Analysis – discuss partner engagement and activity
3. Recommendations – suggest ways to increase participation
Limit to 200 words.
"""
        insight = generator.generate_insight(prompt)
        active_texts[bm] = text + "\nLLM Insight:\n" + insight
    
    return active_texts

def generate_correlation_insight(df, kpi1, kpi2, level_name=None, level_value=None):
    """Generate insights for correlation between two KPIs"""
    correlation_texts = {}
    top_bms_list = df.groupby('BM')[kpi1].sum().sort_values(ascending=False).head(top_bms).index.tolist()
    
    for bm in top_bms_list:
        bm_df = df[df['BM']==bm].copy()
        
        # Ensure numeric types
        bm_df[kpi1] = pd.to_numeric(bm_df[kpi1], errors='coerce').fillna(0)
        bm_df[kpi2] = pd.to_numeric(bm_df[kpi2], errors='coerce').fillna(0)
        
        # Calculate correlation
        try:
            correlation = bm_df[[kpi1, kpi2]].corr().iloc[0, 1]
        except:
            correlation = 0
        
        # Calculate combined score as float
        bm_df['Combined_Score'] = bm_df[kpi1] + bm_df[kpi2]
        bm_df['Combined_Score'] = pd.to_numeric(bm_df['Combined_Score'], errors='coerce')
        
        # Group by partner - use sort_values instead of nlargest
        partner_summary = bm_df.groupby('Partner Name_x').agg({
            kpi1: 'sum',
            kpi2: 'sum',
            'Combined_Score': 'sum'
        })
        
        # Sort by combined score and get top partners
        top_combined = partner_summary.sort_values('Combined_Score', ascending=False).head(top_partners)
        
        text = f"{level_name}: {level_value if level_value else bm}\n"
        text += f"Correlation Analysis between {kpi1} and {kpi2}\n"
        text += f"Correlation Coefficient: {correlation:.3f}\n\n"
        
        if correlation > 0.7:
            strength = "Strong positive correlation"
        elif correlation > 0.3:
            strength = "Moderate positive correlation"
        elif correlation > -0.3:
            strength = "Weak or no correlation"
        else:
            strength = "Negative correlation"
        
        text += f"Interpretation: {strength}\n\n"
        
        if not top_combined.empty:
            text += "Top Performers (Both KPIs):\n"
            for partner, row in top_combined.iterrows():
                text += f"- {partner}: {kpi1}={row[kpi1]:,.2f}, {kpi2}={row[kpi2]:,.2f}\n"
        
        prompt = f"""
Analyze the relationship between {kpi1} and {kpi2}.

Data:
{text}
Analyze the provided correlation data ONLY. Do NOT give generic advice or introduction, No assumptions beyond the data.
Write insights for {level_name} in this structure:
1. Relationship Strength – interpret the correlation coefficient
2. Business Implications – explain what this relationship means
3. Actionable Insights – suggest strategies based on this relationship
Limit output to 150–200 words. Use data values explicitly.
"""
        insight = generator.generate_insight(prompt)
        correlation_texts[bm] = text + "\nLLM Insight:\n" + insight
    
    return correlation_texts

# ========== MAIN EXECUTION (UPDATED FOR ALL LEVELS) ==========
final_insights = {}

top_zms = final_df.groupby('ZM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index

for zm in top_zms:
    zm_group = final_df[final_df['ZM'] == zm]
    final_insights[zm] = {}

    # TOP 1 BM under this ZM
    top_bm = zm_group.groupby('BM')[kpi_list].sum().sum(axis=1).sort_values(ascending=False).head(1).index
    bm_df = zm_group[zm_group['BM'].isin(top_bm)]

    # ========== ZM LEVEL INSIGHTS ==========
    final_insights[zm]['ZM_Overview'] = {}
    
    # For each KPI, generate appropriate insights
    for kpi in kpi_list:
        final_insights[zm]['ZM_Overview'][kpi] = {}
        
        # Always generate concentration insights
        final_insights[zm]['ZM_Overview'][kpi]['Partner Concentration'] = generate_partner_concentration_insight(
            bm_df, kpi, level_name="ZM", level_value=zm
        )
        
        # For sales-related KPIs, generate leaders & laggers
        if any(sales_term in kpi for sales_term in ['Sales', 'Achievement', 'Ach', 'Net Sales']):
            final_insights[zm]['ZM_Overview'][kpi]['Leaders & Laggers'] = generate_leaders_laggers_insight(
                bm_df, kpi, level_name="ZM", level_value=zm
            )
        
        # For active partner KPIs, generate active partners insights
        if 'Active_Partners' in kpi:
            final_insights[zm]['ZM_Overview'][kpi]['Active Partners Analysis'] = generate_active_partners_insight(
                bm_df, kpi, level_name="ZM", level_value=zm
            )
        
        # For all except active partners, generate drastic change and focus area
        if 'Active_Partners' not in kpi:
            final_insights[zm]['ZM_Overview'][kpi]['Drastic Change'] = generate_drastic_change_insight(
                bm_df, level_name="ZM", level_value=zm
            )
            final_insights[zm]['ZM_Overview'][kpi]['Areas to Focus'] = generate_focus_area_insight(
                bm_df, level_name="ZM", level_value=zm
            )
    
    # ========== ADDITIONAL CORRELATION INSIGHTS AT ZM LEVEL ==========
    final_insights[zm]['ZM_Overview']['Special_Correlations'] = {
        'Saturday School vs Client Acquisition': generate_correlation_insight(
            bm_df, 
            'Saturday School (YTD) FY 24-25 Q4 YTD',
            'Clients Acquired FY 24-25 Q4 YTD',
            level_name="ZM", 
            level_value=zm
        ),
        'Client Acquisition vs Equity Sales': generate_correlation_insight(
            bm_df,
            'Clients Acquired FY 24-25 Q4 YTD',
            'Equity Net Sales\nFY 24-25 Q4 YTD',
            level_name="ZM",
            level_value=zm
        )
    }

    # ========== BM LEVEL ==========
    for bm in top_bm:
        bm_group = zm_group[zm_group['BM'] == bm]
        final_insights[zm][bm] = {}
        
        # Create BM overview section
        final_insights[zm][bm]['BM_Overview'] = {}
        
        # For each KPI, generate appropriate insights at BM level
        for kpi in kpi_list:
            final_insights[zm][bm]['BM_Overview'][kpi] = {}
            
            # Always generate concentration insights
            final_insights[zm][bm]['BM_Overview'][kpi]['Partner Concentration'] = generate_partner_concentration_insight(
                bm_group, kpi, level_name="BM", level_value=bm
            )
            
            # For sales-related KPIs
            if any(sales_term in kpi for sales_term in ['Sales', 'Achievement', 'Ach', 'Net Sales']):
                final_insights[zm][bm]['BM_Overview'][kpi]['Leaders & Laggers'] = generate_leaders_laggers_insight(
                    bm_group, kpi, level_name="BM", level_value=bm
                )
            
            # For active partner KPIs
            if 'Active_Partners' in kpi:
                final_insights[zm][bm]['BM_Overview'][kpi]['Active Partners Analysis'] = generate_active_partners_insight(
                    bm_group, kpi, level_name="BM", level_value=bm
                )
            
            # For all except active partners
            if 'Active_Partners' not in kpi:
                final_insights[zm][bm]['BM_Overview'][kpi]['Drastic Change'] = generate_drastic_change_insight(
                    bm_group, level_name="BM", level_value=bm
                )
                final_insights[zm][bm]['BM_Overview'][kpi]['Areas to Focus'] = generate_focus_area_insight(
                    bm_group, level_name="BM", level_value=bm
                )
        
        # ========== ADDITIONAL CORRELATION INSIGHTS AT BM LEVEL ==========
        final_insights[zm][bm]['BM_Overview']['Special_Correlations'] = {
            'Saturday School vs Client Acquisition': generate_correlation_insight(
                bm_group, 
                'Saturday School (YTD) FY 24-25 Q4 YTD',
                'Clients Acquired FY 24-25 Q4 YTD',
                level_name="BM", 
                level_value=bm
            ),
            'Client Acquisition vs Equity Sales': generate_correlation_insight(
                bm_group,
                'Clients Acquired FY 24-25 Q4 YTD',
                'Equity Net Sales\nFY 24-25 Q4 YTD',
                level_name="BM",
                level_value=bm
            )
        }

        # ========== TOP 1 RH ==========
        top_rh = (
            bm_group.groupby('Relationship Handler')[kpi_list]
            .sum().sum(axis=1)
            .sort_values(ascending=False)
            .head(1).index
        )

        for rh in top_rh:
            rh_group = bm_group[bm_group['Relationship Handler'] == rh]
            final_insights[zm][bm][rh] = {}
            
            # Create RH overview section
            final_insights[zm][bm][rh]['RH_Overview'] = {}
            
            # For each KPI, generate appropriate insights at RH level
            for kpi in kpi_list:
                final_insights[zm][bm][rh]['RH_Overview'][kpi] = {}
                
                # Always generate concentration insights
                final_insights[zm][bm][rh]['RH_Overview'][kpi]['Partner Concentration'] = generate_partner_concentration_insight(
                    rh_group, kpi, level_name="RH", level_value=rh
                )
                
                # For sales-related KPIs
                if any(sales_term in kpi for sales_term in ['Sales', 'Achievement', 'Ach', 'Net Sales']):
                    final_insights[zm][bm][rh]['RH_Overview'][kpi]['Leaders & Laggers'] = generate_leaders_laggers_insight(
                        rh_group, kpi, level_name="RH", level_value=rh
                    )
                
                # For active partner KPIs
                if 'Active_Partners' in kpi:
                    final_insights[zm][bm][rh]['RH_Overview'][kpi]['Active Partners Analysis'] = generate_active_partners_insight(
                        rh_group, kpi, level_name="RH", level_value=rh
                    )
                
                # For all except active partners
                if 'Active_Partners' not in kpi:
                    final_insights[zm][bm][rh]['RH_Overview'][kpi]['Drastic Change'] = generate_drastic_change_insight(
                        rh_group, level_name="RH", level_value=rh
                    )
                    final_insights[zm][bm][rh]['RH_Overview'][kpi]['Areas to Focus'] = generate_focus_area_insight(
                        rh_group, level_name="RH", level_value=rh
                    )
            
            # ========== ADDITIONAL CORRELATION INSIGHTS AT RH LEVEL ==========
            final_insights[zm][bm][rh]['RH_Overview']['Special_Correlations'] = {
                'Saturday School vs Client Acquisition': generate_correlation_insight(
                    rh_group, 
                    'Saturday School (YTD) FY 24-25 Q4 YTD',
                    'Clients Acquired FY 24-25 Q4 YTD',
                    level_name="RH", 
                    level_value=rh
                ),
                'Client Acquisition vs Equity Sales': generate_correlation_insight(
                    rh_group,
                    'Clients Acquired FY 24-25 Q4 YTD',
                    'Equity Net Sales\nFY 24-25 Q4 YTD',
                    level_name="RH",
                    level_value=rh
                )
            }

print("=" * 80)
print("INSIGHT GENERATION COMPLETED FOR ALL LEVELS!")
print("=" * 80)
print(f"\nGenerated insights for:")
print(f"1. ZM Level: {len(kpi_list)} KPIs + 2 correlation analyses")
print(f"2. BM Level: {len(kpi_list)} KPIs + 2 correlation analyses")
print(f"3. RH Level: {len(kpi_list)} KPIs + 2 correlation analyses")
print(f"\nTotal KPIs analyzed: {len(kpi_list)}")
print(f"Total correlation analyses per level: 2")


  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Gap',ascending=False).head(top_partners))
  .apply(lambda x: x.sort_values('Max Ga

INSIGHT GENERATION COMPLETED FOR ALL LEVELS!

Generated insights for:
1. ZM Level: 13 KPIs + 2 correlation analyses
2. BM Level: 13 KPIs + 2 correlation analyses
3. RH Level: 13 KPIs + 2 correlation analyses

Total KPIs analyzed: 13
Total correlation analyses per level: 2


In [50]:
# ========== PRINT ACTUAL INSIGHTS ==========

print("\n" + "=" * 80)
print("PRINTING ACTUAL INSIGHTS")
print("=" * 80)

# Function to print insights in a readable format
def print_insights_nicely(insights_dict, max_insights_per_kpi=2):
    """Print insights in a clean, readable format"""
    
    for zm_name, zm_data in insights_dict.items():
        print(f"\n\n{'='*80}")
        print(f"ZONAL MANAGER: {zm_name}")
        print(f"{'='*80}")
        
        # ZM Level Insights
        if 'ZM_Overview' in zm_data:
            print(f"\n{'='*60}")
            print("ZM LEVEL OVERVIEW")
            print(f"{'='*60}")
            
            zm_overview = zm_data['ZM_Overview']
            
            # Print each KPI
            for kpi_idx, (kpi_name, kpi_data) in enumerate(zm_overview.items()):
                if kpi_name == 'Special_Correlations':
                    continue  # Handle correlations separately
                    
                print(f"\n{'~'*50}")
                print(f"KPI: {kpi_name}")
                print(f"{'~'*50}")
                
                if isinstance(kpi_data, dict):
                    for analysis_type, analysis_data in kpi_data.items():
                        print(f"\n{analysis_type.upper()}:")
                        print("-" * 40)
                        
                        if isinstance(analysis_data, dict):
                            insight_count = 0
                            for bm, insight in analysis_data.items():
                                if insight_count < max_insights_per_kpi:
                                    print(f"\nBM: {bm}")
                                    print(f"\n{insight}")
                                    insight_count += 1
                                else:
                                    print(f"\n... and {len(analysis_data) - max_insights_per_kpi} more insights")
                                    break
                        else:
                            print(f"\n{analysis_data}")
                
                # Break after first few KPIs to avoid too much output
                if kpi_idx >= 3:  # Show only first 3 KPIs
                    print(f"\n... and {len(zm_overview) - 4} more KPIs")
                    break
            
            # Print Correlation Insights
            if 'Special_Correlations' in zm_overview:
                print(f"\n\n{'~'*50}")
                print("CORRELATION ANALYSES")
                print(f"{'~'*50}")
                
                for corr_name, corr_data in zm_overview['Special_Correlations'].items():
                    print(f"\n{corr_name}:")
                    print("-" * 40)
                    
                    if isinstance(corr_data, dict):
                        for bm, insight in corr_data.items():
                            print(f"\nBM: {bm}")
                            print(f"\n{insight}")
                            break  # Just show first one
                    else:
                        print(f"\n{corr_data}")
        
        # BM Level Insights (just sample)
        print(f"\n\n{'='*60}")
        print("BM LEVEL (SAMPLE)")
        print(f"{'='*60}")
        
        # Find first BM that's not ZM_Overview
        bm_names = [key for key in zm_data.keys() if key != 'ZM_Overview']
        if bm_names:
            first_bm = bm_names[0]
            print(f"\nBranch Manager: {first_bm}")
            
            if 'BM_Overview' in zm_data[first_bm]:
                bm_overview = zm_data[first_bm]['BM_Overview']
                
                # Show sample KPI from BM
                for kpi_name, kpi_data in list(bm_overview.items())[:1]:  # Just first KPI
                    if kpi_name != 'Special_Correlations':
                        print(f"\nKPI: {kpi_name}")
                        
                        if isinstance(kpi_data, dict):
                            for analysis_type, analysis_data in list(kpi_data.items())[:1]:  # First analysis type
                                print(f"\n{analysis_type}:")
                                
                                if isinstance(analysis_data, dict):
                                    for bm, insight in list(analysis_data.items())[:1]:  # First insight
                                        print(f"\n{insight[:500]}..." if len(insight) > 500 else insight)
                                        break
                                else:
                                    print(f"\n{analysis_data}")
                                break
                        break
        
        # RH Level Insights (just sample)
        print(f"\n\n{'='*60}")
        print("RH LEVEL (SAMPLE)")
        print(f"{'='*60}")
        
        if bm_names:
            first_bm = bm_names[0]
            # Find first RH under this BM
            rh_keys = [key for key in zm_data[first_bm].keys() if key != 'BM_Overview']
            if rh_keys:
                first_rh = rh_keys[0]
                print(f"\nRelationship Handler: {first_rh}")
                
                if 'RH_Overview' in zm_data[first_bm][first_rh]:
                    rh_overview = zm_data[first_bm][first_rh]['RH_Overview']
                    
                    # Show sample KPI from RH
                    for kpi_name, kpi_data in list(rh_overview.items())[:1]:  # Just first KPI
                        if kpi_name != 'Special_Correlations':
                            print(f"\nKPI: {kpi_name}")
                            
                            if isinstance(kpi_data, dict):
                                for analysis_type, analysis_data in list(kpi_data.items())[:1]:  # First analysis type
                                    print(f"\n{analysis_type}:")
                                    
                                    if isinstance(analysis_data, dict):
                                        for bm, insight in list(analysis_data.items())[:1]:  # First insight
                                            print(f"\n{insight[:400]}..." if len(insight) > 400 else insight)
                                            break
                                    else:
                                        print(f"\n{analysis_data}")
                                    break
                            break
        
        # Show summary
        print(f"\n\n{'='*80}")
        print(f"SUMMARY FOR {zm_name}")
        print(f"{'='*80}")
        
        total_insights = 0
        insight_types = {}
        
        # Count ZM insights
        if 'ZM_Overview' in zm_data:
            zm_overview = zm_data['ZM_Overview']
            for kpi_name, kpi_data in zm_overview.items():
                if kpi_name != 'Special_Correlations':
                    if isinstance(kpi_data, dict):
                        for analysis_type, analysis_data in kpi_data.items():
                            if isinstance(analysis_data, dict):
                                count = len(analysis_data)
                                insight_types[analysis_type] = insight_types.get(analysis_type, 0) + count
                                total_insights += count
        
        print(f"\nTotal Insights Generated at ZM Level: {total_insights}")
        print("\nInsight Types:")
        for insight_type, count in insight_types.items():
            print(f"  - {insight_type}: {count}")
        
        break  # Only show first ZM

# Print the insights
print_insights_nicely(final_insights, max_insights_per_kpi=2)

# ========== EXPORT TO FILE ==========

print("\n\n" + "=" * 80)
print("EXPORTING TO FILE")
print("=" * 80)

def export_all_insights(insights_dict, filename="all_insights.txt"):
    """Export all insights to a text file"""
    with open(filename, 'w', encoding='utf-8') as f:
        f.write("=" * 80 + "\n")
        f.write("COMPLETE INSIGHTS REPORT\n")
        f.write("=" * 80 + "\n\n")
        
        for zm_idx, (zm_name, zm_data) in enumerate(insights_dict.items()):
            f.write(f"\n{'='*80}\n")
            f.write(f"ZONAL MANAGER {zm_idx+1}: {zm_name}\n")
            f.write(f"{'='*80}\n\n")
            
            # ZM Level
            if 'ZM_Overview' in zm_data:
                f.write("ZM LEVEL INSIGHTS\n")
                f.write("-" * 40 + "\n\n")
                
                zm_overview = zm_data['ZM_Overview']
                
                # Print all KPIs
                for kpi_name, kpi_data in zm_overview.items():
                    if kpi_name == 'Special_Correlations':
                        continue  # Handle separately
                        
                    f.write(f"\nKPI: {kpi_name}\n")
                    f.write("=" * 40 + "\n\n")
                    
                    if isinstance(kpi_data, dict):
                        for analysis_type, analysis_data in kpi_data.items():
                            f.write(f"{analysis_type.upper()}:\n")
                            f.write("-" * 30 + "\n")
                            
                            if isinstance(analysis_data, dict):
                                for bm, insight in analysis_data.items():
                                    f.write(f"\nBM: {bm}\n")
                                    f.write(f"{insight}\n")
                                    f.write("-" * 60 + "\n")
                            else:
                                f.write(f"\n{analysis_data}\n")
                                f.write("-" * 60 + "\n")
                    
                    f.write("\n\n")
                
                # Correlations
                if 'Special_Correlations' in zm_overview:
                    f.write("\nCORRELATION ANALYSES\n")
                    f.write("=" * 40 + "\n\n")
                    
                    for corr_name, corr_data in zm_overview['Special_Correlations'].items():
                        f.write(f"{corr_name}:\n")
                        f.write("-" * 30 + "\n\n")
                        
                        if isinstance(corr_data, dict):
                            for bm, insight in corr_data.items():
                                f.write(f"BM: {bm}\n")
                                f.write(f"{insight}\n")
                                f.write("-" * 60 + "\n\n")
                        else:
                            f.write(f"{corr_data}\n")
                            f.write("-" * 60 + "\n\n")
            
            # BM Levels
            for bm_name, bm_data in zm_data.items():
                if bm_name != 'ZM_Overview' and 'BM_Overview' in bm_data:
                    f.write(f"\n\n{'='*80}\n")
                    f.write(f"BRANCH MANAGER: {bm_name}\n")
                    f.write(f"{'='*80}\n\n")
                    
                    bm_overview = bm_data['BM_Overview']
                    
                    # Sample of BM insights
                    for kpi_name, kpi_data in list(bm_overview.items())[:3]:  # First 3 KPIs
                        if kpi_name != 'Special_Correlations':
                            f.write(f"\nKPI: {kpi_name}\n")
                            f.write("-" * 30 + "\n\n")
                            
                            if isinstance(kpi_data, dict):
                                for analysis_type, analysis_data in list(kpi_data.items())[:2]:  # First 2 analysis types
                                    f.write(f"{analysis_type}:\n")
                                    f.write("-" * 20 + "\n")
                                    
                                    if isinstance(analysis_data, dict):
                                        for bm_code, insight in list(analysis_data.items())[:1]:  # First insight
                                            f.write(f"\n{insight}\n")
                                            f.write("-" * 50 + "\n")
                                            break
                                    f.write("\n")
                    
                    # RH Levels under this BM
                    for rh_name, rh_data in bm_data.items():
                        if rh_name != 'BM_Overview' and 'RH_Overview' in rh_data:
                            f.write(f"\n\nRelationship Handler: {rh_name}\n")
                            f.write("-" * 40 + "\n\n")
                            
                            rh_overview = rh_data['RH_Overview']
                            
                            # Sample from RH
                            for kpi_name, kpi_data in list(rh_overview.items())[:2]:  # First 2 KPIs
                                if kpi_name != 'Special_Correlations':
                                    f.write(f"\nKPI: {kpi_name}\n")
                                    f.write("-" * 20 + "\n\n")
                                    
                                    if isinstance(kpi_data, dict):
                                        for analysis_type, analysis_data in list(kpi_data.items())[:1]:  # First analysis
                                            f.write(f"{analysis_type}:\n")
                                            f.write("-" * 15 + "\n")
                                            
                                            if isinstance(analysis_data, dict):
                                                for bm_code, insight in list(analysis_data.items())[:1]:  # First insight
                                                    f.write(f"\n{insight}\n")
                                                    break
    
    print(f"✓ All insights exported to: {filename}")

# Export to file
export_all_insights(final_insights, "complete_insights_report.txt")

# ========== QUICK ACCESS TO SPECIFIC INSIGHTS ==========

print("\n\n" + "=" * 80)
print("QUICK ACCESS TO INSIGHTS")
print("=" * 80)

# Get first ZM name
zm_name = list(final_insights.keys())[0] if final_insights else None

if zm_name:
    print(f"\nTo access insights programmatically:")
    print(f"\n1. Get ZM name: zm_name = list(final_insights.keys())[0]")
    print(f"   Current ZM: {zm_name}")
    
    # Show structure
    print(f"\n2. Structure of final_insights:")
    print(f"   final_insights['{zm_name}']['ZM_Overview'] - ZM level insights")
    
    # List available KPIs
    if 'ZM_Overview' in final_insights[zm_name]:
        zm_overview = final_insights[zm_name]['ZM_Overview']
        print(f"\n3. Available KPIs at ZM level:")
        for kpi in list(zm_overview.keys())[:5]:  # Show first 5
            print(f"   - {kpi}")
        
        # Example: Show Equity Sales is available
        if 'Equity Sales' in zm_overview:
            print(f"\n4. Example - Access Equity Sales concentration:")
            print(f"   equity_conc = final_insights['{zm_name}']['ZM_Overview']['Equity Sales']['Partner Concentration']")
            
            # Get first BM from this insight
            equity_data = zm_overview['Equity Sales']['Partner Concentration']
            if equity_data:
                first_bm = list(equity_data.keys())[0]
                insight_preview = equity_data[first_bm]
                print(f"\n   Sample (first 200 chars):")
                print(f"   '{insight_preview[:200]}...'")
    
    print(f"\n5. Full structure:")
    print(f"   final_insights['ZM_NAME']['ZM_Overview']['KPI_NAME']['ANALYSIS_TYPE']['BM_CODE']")
    print(f"\n   Example: final_insights['{zm_name}']['ZM_Overview']['Equity Sales']['Partner Concentration']['{first_bm if 'first_bm' in locals() else 'BM_001'}']")

print("\n" + "=" * 80)
print("INSIGHT GENERATION COMPLETE!")
print("=" * 80)


PRINTING ACTUAL INSIGHTS


ZONAL MANAGER: SARFARAZ ABDULLA PATEL

ZM LEVEL OVERVIEW

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
KPI: Equity Sales
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

PARTNER CONCENTRATION:
----------------------------------------

BM: RAVIKIRAN GOVIND MORE

ZM: SARFARAZ ABDULLA PATEL
KPI: Equity Sales
Total Partners: 345
- Top 4 partners (1.16%) contribute 25% of total Equity Sales.
- Top 17 partners (4.93%) contribute 50% of total Equity Sales.
- Top 46 partners (13.33%) contribute 75% of total Equity Sales.
- Top 55 partners (15.94%) contribute 80% of total Equity Sales.
- Top 80 partners (23.19%) contribute 90% of total Equity Sales.

Top Partner:
- Shilpa Umesh Bagdi: 1,044,004,873.69 (8.34%)

LLM Insight:
1. **Concentration level:** A high concentration of Equity Sales is evident, with the top 1.16% of partners driving 25% of total sales, escalating to 90% of sales generated by the top 23.19% of partners. One partner, Shilpa Umesh Bagdi, co

###sql - code (((NEW)))

In [98]:
import pandas as pd
import re
from sqlalchemy import create_engine

# MySQL connection
engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")

# -------------------------------
# EXTRACT PARTNER NAMES FROM TEXT
# -------------------------------
def extract_partner_names(text):
    if not isinstance(text, str):
        return []
    matches = re.findall(r"-\s*([A-Za-z0-9 .()&]+):", text)
    return [m.strip() for m in matches if m.strip()]

# -------------------------------
# FLATTEN final_insights INTO ROWS
# -------------------------------
def flatten_insights(final_insights):
    records = []

    for zm_name, zm_block in final_insights.items():
        for second_level_key, block in zm_block.items():
            
            # ZM Overview
            if second_level_key == "ZM_Overview":
                bm = None
            else:
                bm = second_level_key

            if not isinstance(block, dict):
                continue

            # Loop through KPIs inside the block
            for kpi_name, kpi_block in block.items():
                if not isinstance(kpi_block, dict):
                    continue

                for insight_type, partner_block in kpi_block.items():
                    if isinstance(partner_block, dict):
                        # partner_block can have multiple partner names
                        for partner_name, insight_text in partner_block.items():
                            # Leaf text might be str
                            if isinstance(insight_text, str):
                                records.append({
                                    "ZM": zm_name,
                                    "BM": bm,
                                    "RH": None,
                                    "Partner_Name": partner_name,
                                    "kpi_insights": kpi_name,
                                    "Insight_Type": insight_type,
                                    "Insights": insight_text
                                })
                            elif isinstance(insight_text, dict):
                                # If nested dict (like Areas to Focus -> partner -> RH)
                                for rh_name, leaf_text in insight_text.items():
                                    records.append({
                                        "ZM": zm_name,
                                        "BM": bm,
                                        "RH": rh_name,
                                        "Partner_Name": partner_name,
                                        "kpi_insights": kpi_name,
                                        "Insight_Type": insight_type,
                                        "Insights": leaf_text
                                    })
                    elif isinstance(partner_block, str):
                        # If partner_block is a string, no partner name
                        records.append({
                            "ZM": zm_name,
                            "BM": bm,
                            "RH": None,
                            "Partner_Name": None,
                            "kpi_insights": kpi_name,
                            "Insight_Type": insight_type,
                            "Insights": partner_block
                        })

    df = pd.DataFrame(records)
    return df

# -------------------------------
# SAVE TO SQL
# -------------------------------
def save_final_insights_to_sql(final_insights, table_name="insights_table"):
    df = flatten_insights(final_insights)
    print("📌 Sample rows:")
    print(df.head())
    if df.empty:
        print("⚠️ No insights found to save!")
        return
    df.to_sql(table_name, con=engine, index=False, if_exists="append")
    print(f"✅ {len(df)} insights saved successfully.")


In [99]:
print(pd.read_sql("SELECT * FROM insights_table LIMIT 5;", con=engine))


Empty DataFrame
Columns: [Zm, BM, Rh, Partner_Name, kpi_insights, Insight_Type, Insights]
Index: []


In [97]:
def print_structure(d, indent=0):
    """
    Recursively print the structure of a nested dict/list.
    """
    prefix = "    " * indent
    if isinstance(d, dict):
        for k, v in d.items():
            print(f"{prefix}- {k} ({type(v).__name__})")
            print_structure(v, indent + 1)
    elif isinstance(d, list):
        print(f"{prefix}- list[{len(d)}]")
        for i, item in enumerate(d):
            print(f"{prefix}  [{i}] ({type(item).__name__})")
            print_structure(item, indent + 2)
    else:
        # leaf value
        print(f"{prefix}- {d} ({type(d).__name__})")

# Example usage
print("Top-level keys in final_insights:", list(final_insights.keys()))
for zm, zm_block in final_insights.items():
    print(f"\nZM: {zm}")
    print_structure(zm_block)


Top-level keys in final_insights: ['SARFARAZ ABDULLA PATEL']

ZM: SARFARAZ ABDULLA PATEL
- ZM_Overview (dict)
    - Equity Sales (dict)
        - Partner Concentration (dict)
            - AJINKYA BHIMRAO GURAV (str)
                - ZM: SARFARAZ ABDULLA PATEL
KPI: Equity Sales
Total Partners: 396
- Top 5 partners (1.26%) contribute 25% of total Equity Sales.
- Top 13 partners (3.28%) contribute 50% of total Equity Sales.
- Top 38 partners (9.6%) contribute 75% of total Equity Sales.
- Top 47 partners (11.87%) contribute 80% of total Equity Sales.
- Top 76 partners (19.19%) contribute 90% of total Equity Sales.

Top Partner:
- ARVIND S LATKAR: 1,103,469,217.74 (8.29%)

LLM Insight:
Here's an analysis of ZM's Equity Sales performance based on the provided data: 1. **Concentration level**: A significant portion of Equity Sales is concentrated among a small percentage of partners. The top 5 partners contribute 25%, and the top 13 partners contribute 50% of total Equity Sales. ARVIND S LA

In [61]:
for zm, block in final_insights.items():
    print("\n==== ZM:", zm, "====")
    print("Keys inside ZM:", list(block.keys()))

    for k, v in block.items():
        print(f"\n--- Key: {k} --- Type: {type(v)}")
        if isinstance(v, dict):
            print("  Subkeys:", list(v.keys())[:5])



==== ZM: SARFARAZ ABDULLA PATEL ====
Keys inside ZM: ['ZM_Overview', 'AJINKYA BHIMRAO GURAV']

--- Key: ZM_Overview --- Type: <class 'dict'>
  Subkeys: ['Equity Sales', 'SIP Sales Achievement', 'Net Sales through MARS', 'Investment Net Sales Achievement']

--- Key: AJINKYA BHIMRAO GURAV --- Type: <class 'dict'>
  Subkeys: ['Equity Sales', 'SIP Sales Achievement', 'Net Sales through MARS', 'Investment Net Sales Achievement', 'KAVIRAJ V. CHAVAN']


In [63]:
import json

def deep_print(d, level=0):
    indent = "    " * level
    if isinstance(d, dict):
        for k, v in d.items():
            print(f"{indent}{k}: {type(v)}")
            deep_print(v, level+1)
    elif isinstance(d, list):
        print(f"{indent}LIST ({len(d)} items)")
        for i, item in enumerate(d[:3]):
            print(f"{indent}  [{i}] -> {type(item)}")
            deep_print(item, level+1)
    else:
        print(f"{indent}VALUE: {repr(d)}")

print("\n============== FULL STRUCTURE INSIDE final_insights ==============")
deep_print(final_insights)



SARFARAZ ABDULLA PATEL: <class 'dict'>
    ZM_Overview: <class 'dict'>
        Equity Sales: <class 'dict'>
            Partner Concentration: <class 'dict'>
                AJINKYA BHIMRAO GURAV: <class 'str'>
                    VALUE: "ZM: SARFARAZ ABDULLA PATEL\nKPI: Equity Sales\nTotal Partners: 396\n- Top 5 partners (1.26%) contribute 25% of total Equity Sales.\n- Top 13 partners (3.28%) contribute 50% of total Equity Sales.\n- Top 38 partners (9.6%) contribute 75% of total Equity Sales.\n- Top 47 partners (11.87%) contribute 80% of total Equity Sales.\n- Top 76 partners (19.19%) contribute 90% of total Equity Sales.\n\nTop Partner:\n- ARVIND S LATKAR: 1,103,469,217.74 (8.29%)\n\nLLM Insight:\nHere's an analysis of ZM's Equity Sales performance based on the provided data: 1. **Concentration level**: A significant portion of Equity Sales is concentrated among a small percentage of partners. The top 5 partners contribute 25%, and the top 13 partners contribute 50% of total Equity 

In [64]:
print(len(final_insights))


1


In [58]:
pretty_print_insights(final_insights)


SARFARAZ ABDULLA PATEL:

    ZM_Overview:

        Equity Sales:

            Partner Concentration:
                - AJINKYA BHIMRAO GURAV: ZM: SARFARAZ ABDULLA PATEL
KPI: Equity Sales
Total Partners: 396
- Top 5 partners (1.26%) contribute 25% of total Equity Sales.
- Top 13 partners (3.28%) contribute 50% of total Equity Sales.
- Top 38 partners (9.6%) contribute 75% of total Equity Sales.
- Top 47 partners (11.87%) contribute 80% of total Equity Sales.
- Top 76 partners (19.19%) contribute 90% of total Equity Sales.

Top Partner:
- ARVIND S LATKAR: 1,103,469,217.74 (8.29%)

LLM Insight:
Here's an analysis of ZM's Equity Sales performance based on the provided data: 1. **Concentration level**: A significant portion of Equity Sales is concentrated among a small percentage of partners. The top 5 partners contribute 25%, and the top 13 partners contribute 50% of total Equity Sales. ARVIND S LATKAR alone generates 8.29% of total sales. 2. **Risk/impact**: High reliance on a small grou

In [111]:
###sql-codes

In [63]:
import pandas as pd
import re
from sqlalchemy import create_engine

# MySQL Connection
engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")

# -------------------------------------------------------------------------
# 🔍 Extract Partner Names (Top Partners + Leaders + Laggers)
# -------------------------------------------------------------------------

def extract_partner_names(text):
    partners = []

    # --- Top Partners ---
    top_matches = re.findall(r"-\s*([A-Za-z0-9 .()&]+):", text)
    partners.extend(top_matches)

    # --- Leaders List ---
    if "Leaders:" in text:
        leaders_block = re.search(r"Leaders:(.*?)Laggers:", text, re.S)
        if leaders_block:
            leaders = re.findall(r"-\s*([A-Za-z0-9 .()&]+)", leaders_block.group(1))
            partners.extend(leaders)

    # --- Laggers List ---
    if "Laggers:" in text:
        laggers_block = re.search(r"Laggers:(.*)", text, re.S)
        if laggers_block:
            laggers = re.findall(r"-\s*([A-Za-z0-9 .()&]+)", laggers_block.group(1))
            partners.extend(laggers)

    # Clean and remove blanks
    partners = [p.strip() for p in partners if p.strip()]
    return partners


# -------------------------------------------------------------------------
# 🔄 MAIN SAVE FUNCTION
# -------------------------------------------------------------------------

def save_all_insights(results, table_name="insights_table"):

    records = []

    for level, level_dict in results.items():

        print(f"\n🔵 Processing Level: {level}")

        if not isinstance(level_dict, dict):
            continue

        if "kpi_insights" not in level_dict:
            print(f"❌ No kpi_insights under {level}")
            continue

        kpi_block = level_dict["kpi_insights"]

        for kpi_name, kpi_dict in kpi_block.items():

            print(f"   🔸 KPI: {kpi_name}")

            for insight_type, partner_dict in kpi_dict.items():

                print(f"      🔹 Insight Type: {insight_type}")

                for owner_name, insight_text in partner_dict.items():

                    print(f"         ✔ Owner: {owner_name}")

                    # Extract actual partner names
                    partner_list = extract_partner_names(insight_text)

                    if not partner_list:
                        print("⚠ No partner names extracted! Saving with owner name only.")
                        partner_list = [owner_name]

                    for partner in partner_list:

                        row = {
                            "ZM": owner_name if level == "ZM" else None,
                            "BM": owner_name if level == "BM" else None,
                            "RH": owner_name if level == "Relationship Handler" else None,

                            "Partner_Name": partner,           # << CORRECT PARTNER
                            "kpi_insights": kpi_name,
                            "Insight_Type": insight_type,
                            "Insights": insight_text
                        }

                        records.append(row)

    # Nothing extracted
    if not records:
        print("❌ No insights found!")
        return

    # Convert to DataFrame
    df = pd.DataFrame(records)
    print("\n📌 First 5 rows to insert:")
    print(df.head())

    # Save to MySQL
    try:
        df.to_sql(table_name, engine, if_exists="append", index=False)
        print(f"\n✅ Saved {len(df)} rows into table {table_name}")
    except Exception as e:
        print("\n❌ SQL Error:", e)


In [50]:
import re
import pandas as pd
from sqlalchemy import create_engine

# ---------------- MYSQL CONNECTION ----------------
engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")

# ---------------- PARTNER NAME EXTRACTION ----------------
def extract_partner_name(text):
    if not isinstance(text, str):
        return "Unknown Partner"

    # Pattern 1: "Partner: ABC"
    match_partner = re.search(r"Partner:\s*([A-Za-z\s\.\-&']+)", text)
    if match_partner:
        return match_partner.group(1).strip()

    # Pattern 2: "- ABC:" inside bullet list
    partner_matches = re.findall(r"-\s*([A-Za-z0-9&\.\'\s]+):", text)
    if partner_matches:
        return partner_matches[0].strip()

    return "Unknown Partner"


# ---------------- FLATTEN & SAVE FUNCTION ----------------
def save_full_insights(insight_dict, table_name="insights_table"):
    records = []

    for zm, zm_block in insight_dict.items():

        # ---------- LEVEL 1 → ZM ----------
        if isinstance(zm_block, dict):
            for bm, bm_block in zm_block.items():

                # ---------- LEVEL 2 → BM ----------
                if isinstance(bm_block, dict):
                    for rh, rh_block in bm_block.items():

                        # ---------- LEVEL 3 → RH ----------
                        if isinstance(rh_block, dict):
                            for partner, insight_obj in rh_block.items():

                                # Actual insight extraction
                                if isinstance(insight_obj, dict):
                                    insight_type = insight_obj.get("type", "General_Insight")
                                    insight_text = insight_obj.get("insight", "")
                                else:
                                    insight_type = "General_Insight"
                                    insight_text = str(insight_obj)

                                final_partner = extract_partner_name(insight_text) or partner

                                records.append({
                                    "ZM": zm,
                                    "BM": bm,
                                    "RH": rh,
                                    "Partner_Name": final_partner,
                                    "Insight_Type": insight_type,
                                    "Insights": insight_text
                                })

                        else:
                            # RH contains direct insight text
                            insight_text = str(rh_block)
                            final_partner = extract_partner_name(insight_text)

                            records.append({
                                "ZM": zm,
                                "BM": bm,
                                "RH": rh,
                                "Partner_Name": final_partner,
                                "Insight_Type": "General_Insight",
                                "Insights": insight_text
                            })

                else:
                    # BM contains direct insight
                    insight_text = str(bm_block)
                    final_partner = extract_partner_name(insight_text)

                    records.append({
                        "ZM": zm,
                        "BM": bm,
                        "RH": "",
                        "Partner_Name": final_partner,
                        "Insight_Type": "General_Insight",
                        "Insights": insight_text
                    })

        else:
            # ZM contains direct insight
            insight_text = str(zm_block)
            final_partner = extract_partner_name(insight_text)

            records.append({
                "ZM": zm,
                "BM": "",
                "RH": "",
                "Partner_Name": final_partner,
                "Insight_Type": "General_Insight",
                "Insights": insight_text
            })

    # Convert to DataFrame
    df_to_save = pd.DataFrame(records)

    # Save to MySQL
    try:
        df_to_save.to_sql(
            name=table_name,
            con=engine,
            if_exists="append",
            index=False
        )
        print(f"Saved {len(df_to_save)} insights successfully to {table_name}.")

        unknown = df_to_save['Partner_Name'].eq('Unknown Partner').sum()
        print(f"Unknown Partners: {unknown}")

    except Exception as e:
        print(f"Error saving insights to MySQL: {e}")


In [51]:
import json
print(json.dumps(results, indent=4)[:2000])


{
    "ZM": {
        "kpi_insights": {
            "Equity Sales": {
                "concentration": {
                    "SARFARAZ ABDULLA PATEL": "ZM: SARFARAZ ABDULLA PATEL\nKPI: Equity Sales\nTotal Partners: 8835\n\n- Top 93 partners (1.05%) contribute 25% of total Equity Sales.\n- Top 357 partners (4.04%) contribute 50% of total Equity Sales.\n- Top 930 partners (10.53%) contribute 75% of total Equity Sales.\n- Top 1122 partners (12.7%) contribute 80% of total Equity Sales.\n- Top 1658 partners (18.77%) contribute 90% of total Equity Sales.\n\nTop 2 Partners:\n- JITENDRA HIRALAL SHAH: 2,390,954,466.20 (1.14% cumulative)\n- Haardik Nayak Financial Products Distribution Private Limited: 1,762,308,281.51 (1.97% cumulative)\n\n\nLLM Insights:\nOkay, let's analyze the partner concentration data for ZM 'SARFARAZ ABDULLA PATEL' regarding Equity Sales.\n\n**1. Analysis of Performance Concentration:**\n\nThe data clearly indicates a **high degree of concentration** in Equity Sales perfo

In [53]:
print("TOP LEVEL KEYS:", results.keys())

for level in results:
    print("\nLEVEL:", level)
    print("Keys under level:", results[level].keys())

    if isinstance(results[level], dict):
        for sub in results[level]:
            print("   SUBKEY:", sub, "| TYPE:", type(results[level][sub]))


TOP LEVEL KEYS: dict_keys(['ZM', 'BM', 'Relationship Handler'])

LEVEL: ZM
Keys under level: dict_keys(['kpi_insights', 'drastic_changes', 'focus_areas'])
   SUBKEY: kpi_insights | TYPE: <class 'dict'>
   SUBKEY: drastic_changes | TYPE: <class 'dict'>
   SUBKEY: focus_areas | TYPE: <class 'dict'>

LEVEL: BM
Keys under level: dict_keys(['kpi_insights', 'drastic_changes', 'focus_areas'])
   SUBKEY: kpi_insights | TYPE: <class 'dict'>
   SUBKEY: drastic_changes | TYPE: <class 'dict'>
   SUBKEY: focus_areas | TYPE: <class 'dict'>

LEVEL: Relationship Handler
Keys under level: dict_keys(['kpi_insights', 'drastic_changes', 'focus_areas'])
   SUBKEY: kpi_insights | TYPE: <class 'dict'>
   SUBKEY: drastic_changes | TYPE: <class 'dict'>
   SUBKEY: focus_areas | TYPE: <class 'dict'>


In [None]:
`

In [27]:
import pandas as pd
from sqlalchemy import create_engine

# MySQL engine
engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")

# Recursive function to flatten insights
def flatten_insights(data, zm=None, bm=None, rh=None, records=None):
    if records is None:
        records = []

    for key, value in data.items():
        # Check level based on type of value
        if isinstance(value, dict):
            # Determine level
            if key in ["ZM", "BM", "Relationship Handler"]:
                flatten_insights(value, zm=key, bm=None, rh=None, records=records)
            elif bm is None:
                flatten_insights(value, zm=zm, bm=key, rh=None, records=records)
            elif rh is None:
                flatten_insights(value, zm=zm, bm=bm, rh=key, records=records)
            else:
                flatten_insights(value, zm=zm, bm=bm, rh=rh, records=records)
        else:
            # Leaf node → store record
            records.append({
                "ZM": zm,
                "BM": bm,
                "RH": rh,
                "Partner_Name": key if rh else None,
                "Insight_Type": value.get("type", "General_Insight") if isinstance(value, dict) else "General_Insight",
                "Insights": value if isinstance(value, str) else ""
            })
    return records

# Flatten your results dictionary
records = flatten_insights(results)

# Convert to DataFrame
df_to_save = pd.DataFrame(records)

# Save to MySQL
df_to_save.to_sql(
    name="insights_table",
    con=engine,
    if_exists="append",
    index=False
)
print(f"Saved {len(df_to_save)} insights successfully.")


Saved 84 insights successfully.


In [22]:
save_full_insights(results)


Error saving insights to MySQL: (pymysql.err.OperationalError) (1054, "Unknown column 'Insights' in 'field list'")
[SQL: INSERT INTO insights_table (`Zm`, `BM`, `Rh`, `Partner_Name`, `Insight_Type`, `Insights`) VALUES (%(Zm)s, %(BM)s, %(Rh)s, %(Partner_Name)s, %(Insight_Type)s, %(Insights)s)]
[parameters: [{'Zm': 'ZM', 'BM': 'kpi_insights', 'Rh': 'Equity Sales', 'Partner_Name': 'Unknown Partner', 'Insight_Type': 'General_Insight', 'Insights': ''}, {'Zm': 'ZM', 'BM': 'kpi_insights', 'Rh': 'Equity Sales', 'Partner_Name': 'Unknown Partner', 'Insight_Type': 'General_Insight', 'Insights': ''}, {'Zm': 'ZM', 'BM': 'kpi_insights', 'Rh': 'SIP Sales_Achievement', 'Partner_Name': 'Unknown Partner', 'Insight_Type': 'General_Insight', 'Insights': ''}, {'Zm': 'ZM', 'BM': 'kpi_insights', 'Rh': 'SIP Sales_Achievement', 'Partner_Name': 'Unknown Partner', 'Insight_Type': 'General_Insight', 'Insights': ''}, {'Zm': 'ZM', 'BM': 'kpi_insights', 'Rh': 'Net Sales through MARS', 'Partner_Name': 'Unknown Partne

In [19]:
import re
import pandas as pd
from sqlalchemy import create_engine

# ==================== MYSQL CONNECTION ====================
engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")

# ==================== PARTNER NAME EXTRACTION ====================
def extract_partner_name(text):
    if not isinstance(text, str):
        return "Unknown Partner"

    match_partner = re.search(r"Partner:\s*([A-Za-z\s\.\-&']+)", text)
    if match_partner:
        return match_partner.group(1).strip()

    match_section = re.search(r"Top\s+\d+\s+Partners.*?:([\s\S]*)", text)
    if match_section:
        section_text = match_section.group(1)
        partner_matches = re.findall(r"-\s*([A-Za-z0-9&\.\'\s]+):", section_text)
        if partner_matches:
            return partner_matches[0].strip()

    return "Unknown Partner"

# ==================== SAVE FUNCTION ====================
def save_full_insights(insight_dict, table_name="insights_table"):
    """
    Flatten the nested hierarchy (ZM → BM → RH → Partner → Insights)
    and save each insight as a row into MySQL.
    """
    records = []

    for zm, zm_dict in insight_dict.items():
        for bm, bm_dict in zm_dict.items():
            # bm_dict may contain RHs or direct insights
            if isinstance(bm_dict, dict):
                for rh, rh_dict in bm_dict.items():
                    # rh_dict may contain partner insights
                    if isinstance(rh_dict, dict):
                        for partner_name, insight_obj in rh_dict.items():
                            if isinstance(insight_obj, dict):
                                # Extract type and actual insight
                                insight_type = insight_obj.get("type", "General_Insight")
                                insight_text = insight_obj.get("insight", "")
                            else:
                                insight_type = "General_Insight"
                                insight_text = str(insight_obj)

                            # Extract partner name if possible
                            extracted_partner = extract_partner_name(insight_text) or partner_name

                            records.append({
                                "Zm": zm,
                                "BM": bm,
                                "Rh": rh,
                                "Partner_Name": extracted_partner,
                                "Insight_Type": insight_type,
                                "Insights": insight_text
                            })
                    else:
                        # RH dict is a string / insight
                        extracted_partner = extract_partner_name(str(rh_dict))
                        records.append({
                            "Zm": zm,
                            "BM": bm,
                            "Rh": rh,
                            "Partner_Name": extracted_partner,
                            "Insight_Type": "General_Insight",
                            "Insights": str(rh_dict)
                        })
            else:
                # BM dict is a string / insight
                extracted_partner = extract_partner_name(str(bm_dict))
                records.append({
                    "Zm": zm,
                    "BM": bm,
                    "Rh": "",
                    "Partner_Name": extracted_partner,
                    "Insight_Type": "General_Insight",
                    "Insights": str(bm_dict)
                })

    # Convert to DataFrame
    df_to_save = pd.DataFrame(records)

    # Save to MySQL
    try:
        df_to_save.to_sql(
            name=table_name,
            con=engine,
            if_exists="append",
            index=False
        )
        print(f"Saved {len(df_to_save)} insights successfully to {table_name}.")
        unknown = df_to_save['Partner_Name'].eq('Unknown Partner').sum()
        print(f"Unknown Partners: {unknown}")
    except Exception as e:
        print(f"Error saving insights to MySQL: {e}")


In [20]:
save_full_insights(results)


Error saving insights to MySQL: (pymysql.err.OperationalError) (1054, "Unknown column 'Insights' in 'field list'")
[SQL: INSERT INTO insights_table (`Zm`, `BM`, `Rh`, `Partner_Name`, `Insight_Type`, `Insights`) VALUES (%(Zm)s, %(BM)s, %(Rh)s, %(Partner_Name)s, %(Insight_Type)s, %(Insights)s)]
[parameters: [{'Zm': 'ZM', 'BM': 'kpi_insights', 'Rh': 'Equity Sales', 'Partner_Name': 'Unknown Partner', 'Insight_Type': 'General_Insight', 'Insights': ''}, {'Zm': 'ZM', 'BM': 'kpi_insights', 'Rh': 'Equity Sales', 'Partner_Name': 'Unknown Partner', 'Insight_Type': 'General_Insight', 'Insights': ''}, {'Zm': 'ZM', 'BM': 'kpi_insights', 'Rh': 'SIP Sales_Achievement', 'Partner_Name': 'Unknown Partner', 'Insight_Type': 'General_Insight', 'Insights': ''}, {'Zm': 'ZM', 'BM': 'kpi_insights', 'Rh': 'SIP Sales_Achievement', 'Partner_Name': 'Unknown Partner', 'Insight_Type': 'General_Insight', 'Insights': ''}, {'Zm': 'ZM', 'BM': 'kpi_insights', 'Rh': 'Net Sales through MARS', 'Partner_Name': 'Unknown Partne

In [39]:
!pip install mysql-connector-python





[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip





In [None]:
import mysql.connector

conn = mysql.connector.connect(
    host="localhost",
    user="root",
    password="1234",
    database="insights"
)

cursor = conn.cursor()


In [29]:
def get_top_managers(results):
    top_managers = {}

    for level in ["ZM", "BM", "Relationship Handler"]:
        if level in results:
            managers_set = set()
            kpi_insights = results[level].get("kpi_insights", {})
            for kpi, kpi_block in kpi_insights.items():
                # concentration keys
                conc = kpi_block.get("concentration", {})
                managers_set.update(conc.keys())
                # leaders_laggers keys
                ll = kpi_block.get("leaders_laggers", {})
                managers_set.update(ll.keys())
            top_managers[level] = list(managers_set)
        else:
            top_managers[level] = []

    return top_managers

# Example usage:
top_managers = get_top_managers(results)
print(top_managers)


{'ZM': ['PRASHANT ANANTRAI KAKKAD', 'SARFARAZ ABDULLA PATEL', 'HIMANSHU BHARAT MEHTA'], 'BM': ['SAMEER PAKALE', 'CHIRAG J KOTHARI', 'RAVI KACHIWALA', 'SARFARAJ YAFAI', 'ADNAN DHILAWALA'], 'Relationship Handler': ['Realnet Wealth Creators Llp', 'VA Financial Products Distribution LLP', 'Maher Ratan Dhamodiwala', 'Dinesh Suvarna']}


In [30]:
def get_top_hierarchy_with_partners(results):
    hierarchy_partners = {}

    for level in ["ZM", "BM", "Relationship Handler"]:
        hierarchy_partners[level] = {}
        if level in results:
            kpi_insights = results[level].get("kpi_insights", {})
            
            for kpi, kpi_block in kpi_insights.items():
                # Check concentration block
                conc = kpi_block.get("concentration", {})
                for manager, text in conc.items():
                    if manager not in hierarchy_partners[level]:
                        hierarchy_partners[level][manager] = set()
                    # Extract partner names from formatted text
                    lines = text.splitlines()
                    for line in lines:
                        if line.startswith("- ") and ":" in line:
                            partner_name = line.split(":")[0].replace("- ", "").strip()
                            hierarchy_partners[level][manager].add(partner_name)
    
        # Convert sets to lists
        for manager in hierarchy_partners[level]:
            hierarchy_partners[level][manager] = list(hierarchy_partners[level][manager])
    
    return hierarchy_partners

# Example usage:
top_hierarchy_with_partners = get_top_hierarchy_with_partners(results)
import json
print(json.dumps(top_hierarchy_with_partners, indent=4))


{
    "ZM": {
        "SARFARAZ ABDULLA PATEL": [
            "Haardik Nayak Financial Products Distribution Private Limited",
            "TUSHAR PRAVINBHAI DESAI",
            "JITENDRA HIRALAL SHAH",
            "SURAJ SUBHASH GHODAKE"
        ],
        "HIMANSHU BHARAT MEHTA": [
            "VISHWAS DESHPANDE ALLIANCE PRIVATE LIMITED",
            "SNOWBALL FINANCIAL SERVICES PVT LTD",
            "MARZEE MAIDHYOMAH KERAWALA",
            "SACHIN R SHETTY"
        ],
        "PRASHANT ANANTRAI KAKKAD": [
            "Jyoti Mehndiratta",
            "Nilesh Ratilal Malde",
            "DHARMESH RATILAL SONI",
            "Sahayak Associates",
            "DEEPAK CHANDRA BHATT",
            "Manish Rastogi"
        ]
    },
    "BM": {
        "SARFARAJ YAFAI": [
            "SNOWBALL FINANCIAL SERVICES PVT LTD",
            "FUNDFOX WEALTH PRIVATE LIMITED",
            "VND WEALTH PRIVATE LIMITED",
            "F2 FUNDS PRIVATE LIMITED"
        ],
        "ADNAN DHILAWALA": [
     

In [25]:
required_cols = [
    "Partner Name_x",
    "FY_Year",
    "Equity Sales",
    "SIP Sales_Achievement",
    "Net Sales through MARS",
    "Investment Net Sales Achievement",
    "ZM",
    "BM",
    "Relationship Handler"
]

missing = [c for c in required_cols if c not in final_df.columns]

print("Missing columns:", missing)


Missing columns: []


In [24]:
final_df.head()

Unnamed: 0,Sr No.,Partner Code,Partner Name_x,Center_x,Category_x,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year,Sr No,Broker Code,Doer Name_x,Doer Type_x,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement_x,ZM,SRM,RM,BM,Partner Name_y,Category_y,Doer Name_y,Doer Type_y,Center_y,Age in NJ,Age in NJ Target,Total AUM FY 23-24 Q4 YTD,Equity+ Hyb AUM FY 23-24 Q4 YTD,LIVE SIP FY 23-24 Q4 YTD,Total Net Sales\nFY 23-24 YTD,Equity Net Sales\nFY 23-24 YTD,Net SIP\nFY 23-24 YTD,MARS AUM FY 23-24 YTD,MARS Net Sales FY 23-24 YTD,PMS AUM FY 23-24 YTD,Net Sales FY 23-24 YTD,Clients Acquired FY 23-24 YTD,Live Accounts FY 23-24 YTD,Saturday School (YTD) FY 23-24 Q4 YTD,Investment\nSaturday School (YTD) FY 23-24 Q4 YTD,Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD,Total Group FY 23-24 Q4 YTD,Group Covered FY 23-24 Q4 YTD,% Covered,Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr),Total Reviews\nFY 23-24 Q4 YTD,Amount FY 23-24 Q4 YTD\n(in Cr),Flexicap Target\nFY 23-24 Q4 YTD\n(in Cr),Flexicap Ach\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Target\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Ach\nFY 23-24 Q4 YTD\n(in Cr),Target Qty (FY 23-24 Q4 YTD)\n(in Cr),Order Qty\n(FY 23-24 Q4 YTD)\n(in Cr),Equity Net Sales% Achievement,SIP Sales% Achievement,Client Acquisition% Achievement,Insurance% Achievement,LAS% Achievement,Total % Achievement_y,Status,Total AUM FY 24-25 Q4 YTD,Equity AUM FY 24-25 Q4 YTD,LIVE SIP FY 24-25 Q4 YTD,Total Net Sales\nFY 24-25 Q4 YTD,Equity Net Sales\nFY 24-25 Q4 YTD,Net SIP\nFY 24-25 Q4 YTD,MARS AUM FY 24-25 Q4 YTD,MARS Net Sales FY 24-25 Q4 YTD,PMS AUM FY 24-25 Q4 YTD,Net Sales FY 24-25 Q4 YTD,Clients Acquired FY 24-25 Q4 YTD,Live Accounts (Non-D) FY 24-25 Q4 YTD,Saturday School (YTD) FY 24-25 Q4 YTD,Investment\nSaturday School (YTD) FY 24-25 Q4 YTD,Insurance\nSaturday School (YTD) FY 24-25 Q4 YTD,Total Group FY 24-25 Q4 YTD,Group Covered FY 24-25 Q4 YTD,Total Reviews\nFY 24-25 Q4 YTD,Non-NJ AUM\nFY 24-25 Q4 YTD,Amount FY 24-25 Q4 YTD,Flexicap Target\nFY 24-25 Q4 YTD,Flexicap Ach\nFY 24-25 Q4 YTD,AMC NS Target\nFY 24-25 Q4 YTD,AMC NS Ach\nFY 24-25 Q4 YTD,Target Qty (FY 24-25 Q4 YTD)\n(in Cr),Order Qty\n(FY 24-25 Q4 YTD)\n(in Cr),MARS TO EQ AUM
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,1,23676,SUBRATA MAITY,Fundz Express,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,2,20361,SUBRATA MAITY,Fundz Express,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,3,24695,CHINTU KUMAR SHAW,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1200000,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,27000,0.0,0.0,0.0,0.0,2023,3,24695,SUBRATA MAITY,Fundz Express,1200000,0.0,0.0,,,,27000,0.0,0.0,16,3.0,18.75,0.0,0,,0.0,0.0,2.81,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,4,24040,GOURAB PURKAIT,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,26998.7,1.8,26998.7,0,0,0,0,0,0,0,0,0,0,33750,10499.49,31.11,10499.49,0.0,2023,4,24040,SUBRATA MAITY,Fundz Express,1500000,26998.7,1.8,,,,33750,10499.49,31.11,20,3.0,15.0,3.11,0,0.0,6999.66,2.33,14.59,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,5,25776,RAJ KISHORE BARIK,24 SOUTH PARGANA,NON D,SUBRATA MAITY,600000,496499.92,82.75,1499.92,0,0,0,0,0,0,0,0,0,495000,13500,1499.92,11.11,1499.92,0.0,2023,5,25776,SUBRATA MAITY,Fundz Express,600000,496499.92,82.75,,,,13500,1499.92,11.11,8,3.0,37.5,0.0,0,,0.0,0.0,40.82,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


#### New(Preprocessing)

In [36]:
import os
import sys
import re
import logging
import pandas as pd
from datetime import datetime

# ======================================================
#          LOGGING SETUP
# ======================================================
def setup_logging():
    """Setup logging configuration"""
    if not os.path.exists('logs'):
        os.makedirs('logs')
    
    log_filename = f"logs/data_processor_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
    
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(log_filename, encoding='utf-8'),
            logging.StreamHandler(sys.stdout)
        ]
    )
    return logging.getLogger("DataProcessor")

logger = setup_logging()


# ======================================================
#          HELPER FUNCTIONS
# ======================================================
def extract_fy_year(filename):
    """Extract FY year from filename automatically"""
    match = re.search(r"FY(\d{2})", filename.upper())
    if match:
        return "20" + match.group(1)
    return None


# ======================================================
#          LOAD FILES FROM FOLDERS
# ======================================================
def load_files_from_subfolders(main_folder):
    """
    Reads all Excel/CSV files from three subfolders:
    - Business number
    - Score card
    - MIS

    Returns dictionary with lists of file paths.
    """

    subfolders = ["Business Numbers", "Scores", "Partner MIS"]
    file_dict = {}

    for sub in subfolders:
        folder_path = os.path.join(main_folder, sub)

        if not os.path.exists(folder_path):
            logger.error(f" Folder does not exist: {folder_path}")
            file_dict[sub] = []
            continue

        logger.info(f" Scanning files from: {folder_path}")

        file_list = []

        for filename in os.listdir(folder_path):
            full_path = os.path.join(folder_path, filename)

            if filename.lower().endswith((".xlsx", ".xls", ".csv")):
                logger.info(f" Found: {filename}")
                file_list.append(full_path)
            else:
                logger.info(f" Skipped (not Excel/CSV): {filename}")

        file_dict[sub] = file_list

    return file_dict


# ======================================================
#          DATA PROCESSOR CLASS
# ======================================================

class DataProcessor:

    def __init__(self):
        self.logger = logging.getLogger("DataProcessor")
        self.logger.info("DataProcessor initialized")

    # --------------------------------------------------
    def load_business_numbers(self, file_paths):
        """Load & preprocess BN files"""
        all_bn = []

        for file in file_paths:
            try:
                self.logger.info(f"Loading Business Numbers: {file}")
                df = pd.read_excel(file)

                df.columns = df.iloc[0]
                df = df[1:].reset_index(drop=True)

                df["FY_Year"] = extract_fy_year(file)

                all_bn.append(df)

            except Exception as e:
                self.logger.error(f"Error in BN file {file}: {e}", exc_info=True)

        return pd.concat(all_bn, ignore_index=True)

    # --------------------------------------------------
    def load_scorecards(self, file_paths):
        """Load & preprocess Scorecard files"""
        all_sc = []

        for file in file_paths:
            try:
                self.logger.info(f"Loading Scorecard: {file}")
                df = pd.read_excel(file)

                df.columns = df.iloc[0]
                df = df[1:].reset_index(drop=True)

                # Fill forward top-level headers
                headers = df.columns.to_list()
                filled = []
                last = None
                for h in headers:
                    if pd.notna(h):
                        last = h
                    filled.append(last)
                df.columns = filled

                # Sub headers
                sub = df.iloc[0].fillna('')
                df = df[1:]

                df.columns = [
                    f"{t}_{s}" if s != "" else t
                    for t, s in zip(df.columns, sub)
                ]

                df["FY_Year"] = extract_fy_year(file)

                all_sc.append(df)

            except Exception as e:
                self.logger.error(f"Error in Scorecard file {file}: {e}", exc_info=True)

        return pd.concat(all_sc, ignore_index=True)

    # --------------------------------------------------
    def load_mis_data(self, file_paths):
        """Load & preprocess MIS files"""
        all_mis = []

        for file in file_paths:
            try:
                self.logger.info(f"Loading MIS: {file}")
                df = pd.read_excel(file)

                df.columns = df.iloc[0]
                df = df[1:].reset_index(drop=True)

                df["FY_Year"] = extract_fy_year(file)

                all_mis.append(df)

            except Exception as e:
                self.logger.error(f"Error in MIS file {file}: {e}", exc_info=True)

        return pd.concat(all_mis, ignore_index=True)

    # --------------------------------------------------
    def merge_all_data(self, bn_df, sc_df, mis_df):
        """Final merging"""
        self.logger.info("Merging BN + SC...")

        merged_bn_sc = pd.merge(
            bn_df,
            sc_df,
            left_on=["Partner Code", "Partner Name", "FY_Year"],
            right_on=["Broker Code", "Partner Name", "FY_Year"],
            how="left",
            suffixes=("", "_DROP")
        )

        drop_cols = [c for c in merged_bn_sc.columns if "_DROP" in c]
        merged_bn_sc.drop(columns=drop_cols, inplace=True)

        self.logger.info("Merging with MIS...")

        final_df = pd.merge(
            merged_bn_sc,
            mis_df,
            on=["Broker Code", "FY_Year"],
            how="left"
        )

        self.logger.info(f"Final dataset shape → {final_df.shape}")
        return final_df


# ======================================================
#                       MAIN
# ======================================================

def main():

    logger.info("=========== START PIPELINE ===========")

    main_folder = r"C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa"   # <-- PUT YOUR REAL PATH HERE

    # Step 1: Auto-load files from folders
    file_dict = load_files_from_subfolders(main_folder)

    # Step 2: Create processor
    processor = DataProcessor()

    # Step 3: Load each section
    bn_df = processor.load_business_numbers(file_dict["Business Numbers"])
    sc_df = processor.load_scorecards(file_dict["Scores"])
    mis_df = processor.load_mis_data(file_dict["Partner MIS"])

    # Step 4: Merge
    final_df = processor.merge_all_data(bn_df, sc_df, mis_df)

    logger.info("=========== PIPELINE FINISHED ===========")
    print("\nFinal dataframe shape:", final_df.shape)

    return final_df


# Run script
if __name__ == "__main__":
    main()

2025-11-18 10:55:21,311 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa\Business Numbers
2025-11-18 10:55:21,311 - DataProcessor - INFO -  Found: FY23 - YTD - Partner Score Card - Business Numbers.xls
2025-11-18 10:55:21,311 - DataProcessor - INFO -  Found: FY24 - YTD - Partner Score Card - Business Numbers.xls
2025-11-18 10:55:21,311 - DataProcessor - INFO -  Found: FY25 - YTD - Partner Score Card - Business Numbers.xls
2025-11-18 10:55:21,318 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa\Scores
2025-11-18 10:55:21,320 - DataProcessor - INFO -  Found: FY23 - YTD - Partner Score Card - Scores.xls
2025-11-18 10:55:21,320 - DataProcessor - INFO -  Found: FY24 - YTD - Partner Score Card - Scores.xls
2025-11-18 10:55:21,320 - DataProcessor - INFO -  Found: FY25 - YTD - Partner Score Card - Scores.xls
2025-11-18 10:55:21,324 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQ

In [38]:
def call_esha():
    return "call esha"

In [39]:
call_esha()

'call esha'

In [40]:
xx = call_esha()

In [41]:
xx

'call esha'

In [42]:
result = main()

2025-11-18 11:05:25,994 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa\Business Numbers
2025-11-18 11:05:25,997 - DataProcessor - INFO -  Found: FY23 - YTD - Partner Score Card - Business Numbers.xls
2025-11-18 11:05:25,998 - DataProcessor - INFO -  Found: FY24 - YTD - Partner Score Card - Business Numbers.xls
2025-11-18 11:05:25,999 - DataProcessor - INFO -  Found: FY25 - YTD - Partner Score Card - Business Numbers.xls
2025-11-18 11:05:25,999 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQUARE\Desktop\Esha\NJ Insights\Data\Dataa\Scores
2025-11-18 11:05:26,001 - DataProcessor - INFO -  Found: FY23 - YTD - Partner Score Card - Scores.xls
2025-11-18 11:05:26,001 - DataProcessor - INFO -  Found: FY24 - YTD - Partner Score Card - Scores.xls
2025-11-18 11:05:26,003 - DataProcessor - INFO -  Found: FY25 - YTD - Partner Score Card - Scores.xls
2025-11-18 11:05:26,004 - DataProcessor - INFO -  Scanning files from: C:\Users\G-SQ

In [45]:
result.columns

Index(['Sr No.', 'Partner Code', 'Partner Name_x', 'Center_x', 'Category_x',
       'Relationship Handler', 'Investment Net Sales Target',
       'Investment Net Sales Achievement',
       'Investment Net Sales % Achievement', 'Equity Sales',
       ...
       'Total Reviews\nFY 24-25 Q4 YTD', 'Non-NJ AUM\nFY 24-25 Q4 YTD',
       'Amount FY 24-25 Q4 YTD', 'Flexicap Target\nFY 24-25 Q4 YTD',
       'Flexicap Ach\nFY 24-25 Q4 YTD', 'AMC NS Target\nFY 24-25 Q4 YTD',
       'AMC NS Ach\nFY 24-25 Q4 YTD', 'Target Qty (FY 24-25 Q4 YTD)\n(in Cr)',
       'Order Qty\n(FY 24-25 Q4 YTD)\n(in Cr)', 'MARS TO EQ AUM'],
      dtype='object', length=120)

In [300]:
cfg.google_api_key

'AIzaSyBGIQOXNlkmsuvXbL-8m708f5bmD6vKiKc'

In [304]:
import pandas as pd
import numpy as np
import config as cfg
import google.generativeai as genai
from typing import List, Dict, Tuple, Optional
import logging
from dataclasses import dataclass

# ================= CONFIGURATION =================
@dataclass
class InsightConfig:
    """Configuration for insight generation"""
    kpi_list: List[str] = None
    hierarchy_levels: List[str] = None
    partner_column: str = 'Partner Name_x'
    partner_code_column: str = 'Partner Code'
    broker_code_column: str = 'Broker Code'
    relationship_handler_column: str = 'Relationship Handler'
    thresholds: List[int] = None
    top_managers: int = 3
    top_partners: int = 2
    change_threshold: float = 50.0
    year_column: str = 'FY_Year_x'
    
    def __post_init__(self):
        if self.kpi_list is None:
            self.kpi_list = [
                'Equity Sales',
                'SIP Sales_Achievement', 
                'Net Sales through MARS',
                'Investment Net Sales Achievement'
            ]
        if self.hierarchy_levels is None:
            self.hierarchy_levels = ['ZM', 'BM', 'Relationship Handler']  # ZM first
        if self.thresholds is None:
            self.thresholds = [25, 50, 75, 80, 90]

# ================= GEMINI SERVICE =================
class GeminiService:
    """Handles all Gemini LLM interactions"""
    def __init__(self, api_key: str, model_name: str = "gemini-2.0-flash"):
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel(model_name)
        self.logger = logging.getLogger(__name__)
    
    def generate_insight(self, prompt: str, temperature: float = 0.7) -> str:
        """Generate insight using Gemini model"""
        try:
            response = self.model.generate_content(
                prompt,
                generation_config={
                    "temperature": temperature,
                    "top_p": 0.9,
                    "max_output_tokens": 800
                }
            )
            return response.text.strip()
        except Exception as e:
            self.logger.error(f"Gemini API error: {e}")
            return f"Error generating insight: {e}"

# ================= BASE INSIGHT GENERATOR =================
class InsightGenerator:
    """Base class for all insight generators"""
    def __init__(self, gemini_service: GeminiService, config: InsightConfig):
        self.gemini = gemini_service
        self.config = config
        self.logger = logging.getLogger(__name__)
    
    def validate_dataframe(self, df: pd.DataFrame, required_columns: List[str]) -> bool:
        """Validate that required columns exist in dataframe"""
        missing_columns = [col for col in required_columns if col not in df.columns]
        if missing_columns:
            self.logger.warning(f"Missing columns in dataframe: {missing_columns}")
            return False
        return True
    
    def get_top_managers(self, df: pd.DataFrame, kpi: str, hierarchy_level: str) -> List[str]:
        """Get top managers by KPI for given hierarchy level"""
        if not self.validate_dataframe(df, [hierarchy_level, kpi]):
            return []
            
        # Filter out NaN values in hierarchy level
        df_clean = df[df[hierarchy_level].notna()]
        
        if df_clean.empty:
            return []
        
        return (
            df_clean.groupby(hierarchy_level)[kpi]
            .sum()
            .sort_values(ascending=False)
            .head(self.config.top_managers)
            .index
            .tolist()
        )

# ================= PARTNER CONCENTRATION =================
class ConcentrationInsightGenerator(InsightGenerator):
    """Generates partner concentration insights"""
    
    def calculate_concentration(self, df: pd.DataFrame, kpi: str) -> Tuple[Dict, int, pd.DataFrame]:
        """Calculate concentration metrics for a group"""
        required_cols = [self.config.partner_column, kpi]
        if not self.validate_dataframe(df, required_cols):
            return {}, 0, pd.DataFrame()
            
        try:
            # Filter out partners with no name
            df_clean = df[df[self.config.partner_column].notna()]
            
            if df_clean.empty:
                return {}, 0, pd.DataFrame()
            
            df_sorted = (
                df_clean.groupby(self.config.partner_column)[kpi]
                .sum()
                .sort_values(ascending=False)
                .reset_index()
            )

            total_kpi = df_sorted[kpi].sum()
            if total_kpi == 0:
                return {}, len(df_sorted), df_sorted
                
            df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
            df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100

            total_partners = len(df_sorted)
            conc_summary = {}

            for threshold in self.config.thresholds:
                try:
                    num_partners = (df_sorted['Cumulative KPI %'] >= threshold).idxmax() + 1
                    conc_summary[f'Partners for {threshold}%'] = num_partners
                    conc_summary[f'% of Total Partners for {threshold}%'] = round(
                        num_partners / total_partners * 100, 2
                    )
                except (ValueError, IndexError):
                    conc_summary[f'Partners for {threshold}%'] = total_partners
                    conc_summary[f'% of Total Partners for {threshold}%'] = 100.0

            return conc_summary, total_partners, df_sorted
            
        except Exception as e:
            self.logger.error(f"Error calculating concentration: {e}")
            return {}, 0, pd.DataFrame()
    
    def generate_manager_concentration(self, df: pd.DataFrame, kpi: str, hierarchy_level: str) -> Dict:
        """Generate concentration insights for all managers at given hierarchy level"""
        required_cols = [hierarchy_level, self.config.partner_column, kpi]
        if not self.validate_dataframe(df, required_cols):
            self.logger.warning(f"Missing required columns for concentration analysis: {required_cols}")
            return {}
            
        manager_insights = {}
        top_managers = self.get_top_managers(df, kpi, hierarchy_level)
        
        if not top_managers:
            self.logger.warning(f"No managers found for {hierarchy_level} level with KPI {kpi}")
            return {}
        
        for manager in top_managers:
            try:
                manager_data = df[df[hierarchy_level] == manager]
                if manager_data.empty:
                    continue
                    
                conc_summary, total_partners, df_sorted = self.calculate_concentration(manager_data, kpi)
                
                if not conc_summary:
                    continue
                    
                # Create concentration text
                insight_text = self._format_concentration_text(
                    manager, kpi, hierarchy_level, total_partners, conc_summary, df_sorted
                )
                
                # Generate LLM insights
                llm_insight = self._generate_concentration_llm_insight(
                    manager, kpi, hierarchy_level, insight_text
                )
                
                manager_insights[manager] = insight_text + "\n\nLLM Insights:\n" + llm_insight
                
            except Exception as e:
                self.logger.error(f"Error processing manager {manager} for concentration: {e}")
                continue
            
        return manager_insights
    
    def _format_concentration_text(self, manager: str, kpi: str, level: str, 
                                 total_partners: int, conc_summary: Dict, df_sorted: pd.DataFrame) -> str:
        """Format concentration analysis into readable text"""
        text = f"{level}: {manager}\nKPI: {kpi}\n"
        text += f"Total Partners under {manager}: {total_partners}\n\n"
        text += f"Partner Concentration Insight for KPI '{kpi}':\n"
        
        for threshold in self.config.thresholds:
            partners_key = f'Partners for {threshold}%'
            percent_key = f'% of Total Partners for {threshold}%'
            
            if partners_key in conc_summary:
                text += (
                    f" - Top {conc_summary[partners_key]} partners "
                    f"({conc_summary[percent_key]}% of total partners) "
                    f"contribute {threshold}% of total {kpi}.\n"
                )
        
        # Add top partners for context
        if not df_sorted.empty:
            top_partners_data = df_sorted.head(self.config.top_partners)[[self.config.partner_column, kpi, 'Cumulative KPI %']]
            text += f"\nTop {self.config.top_partners} Partners (for reference):\n"
            for _, row in top_partners_data.iterrows():
                partner_name = row[self.config.partner_column]
                text += f" - {partner_name}: {row[kpi]:,.2f} ({row['Cumulative KPI %']:.2f}% cumulative)\n"
            
        return text
    
    def _generate_concentration_llm_insight(self, manager: str, kpi: str, level: str, text: str) -> str:
        """Generate LLM insights for concentration analysis"""
        prompt = f"""
        You are a financial performance analyst.
        Below is data for {level} '{manager}' on KPI '{kpi}' showing partner concentration.
        Analyze how concentrated performance is (few vs many partners contributing).
        Write 3–4 crisp insights about:
        1. Level of concentration (high/medium/low),
        2. Implications on business dependency,
        3. Partner development or risk recommendations.

        Data:
        {text}
        """
        return self.gemini.generate_insight(prompt)

# ================= LEADERS & LAGGERS =================
class LeadersLaggersInsightGenerator(InsightGenerator):
    """Generates leaders and laggers insights"""
    
    def generate_manager_leaders_laggers(self, df: pd.DataFrame, kpi: str, hierarchy_level: str) -> Dict:
        """Generate leaders/laggers insights for all managers at given hierarchy level"""
        required_cols = [hierarchy_level, self.config.partner_column, kpi]
        if not self.validate_dataframe(df, required_cols):
            self.logger.warning(f"Missing required columns for leaders/laggers analysis: {required_cols}")
            return {}
            
        manager_insights = {}
        top_managers = self.get_top_managers(df, kpi, hierarchy_level)
        
        if not top_managers:
            self.logger.warning(f"No managers found for {hierarchy_level} level with KPI {kpi}")
            return {}
        
        for manager in top_managers:
            try:
                manager_data = df[df[hierarchy_level] == manager]
                if manager_data.empty:
                    continue
                    
                insight_text = self._analyze_leaders_laggers(manager, kpi, hierarchy_level, manager_data)
                
                # Generate LLM insights
                llm_insight = self._generate_leaders_laggers_llm_insight(
                    manager, kpi, hierarchy_level, insight_text
                )
                
                manager_insights[manager] = insight_text + "\n\nLLM Insights:\n" + llm_insight
                
            except Exception as e:
                self.logger.error(f"Error processing manager {manager} for leaders/laggers: {e}")
                continue
            
        return manager_insights
    
    def _analyze_leaders_laggers(self, manager: str, kpi: str, level: str, df: pd.DataFrame) -> str:
        """Analyze leaders and laggers for a manager"""
        try:
            # Filter out partners with no name
            df_clean = df[df[self.config.partner_column].notna()]
            
            if df_clean.empty:
                return f"{level}: {manager}\nKPI: {kpi}\n\nNo partner data available."
            
            # Sort partners by performance
            perf = (
                df_clean.groupby(self.config.partner_column)[kpi]
                .sum()
                .sort_values(ascending=False)
                .reset_index()
            )

            if perf.empty:
                return f"{level}: {manager}\nKPI: {kpi}\n\nNo partner data available."

            total_perf = perf[kpi].sum()
            if total_perf == 0:
                return f"{level}: {manager}\nKPI: {kpi}\n\nNo performance data available."

            # Leaders (Top N)
            leaders = perf.head(self.config.top_partners).copy()
            leaders['% Share'] = round(leaders[kpi] / total_perf * 100, 2)

            # Laggers (Bottom N)
            laggers = perf.tail(self.config.top_partners).copy()
            laggers['% Share'] = round(laggers[kpi] / total_perf * 100, 2)

            # Performance summary
            leaders_contrib = leaders['% Share'].sum()
            laggers_contrib = laggers['% Share'].sum()

            text = f"{level}: {manager}\nKPI: {kpi}\n\n"
            text += f"Total Partners under {manager}: {len(perf)}\n"
            text += f"Top {self.config.top_partners} Leaders contribute: {leaders_contrib:.2f}% of total {kpi}\n"
            text += f"Bottom {self.config.top_partners} Laggers contribute: {laggers_contrib:.2f}% of total {kpi}\n\n"

            text += "Top Performing (Leader) Partners:\n"
            for _, row in leaders.iterrows():
                partner_name = row[self.config.partner_column]
                text += f" - {partner_name}: {row[kpi]:,.2f} ({row['% Share']}%)\n"

            text += "\nLow Performing (Lagger) Partners:\n"
            for _, row in laggers.iterrows():
                partner_name = row[self.config.partner_column]
                text += f" - {partner_name}: {row[kpi]:,.2f} ({row['% Share']}%)\n"

            return text
            
        except Exception as e:
            self.logger.error(f"Error analyzing leaders/laggers for {manager}: {e}")
            return f"{level}: {manager}\nKPI: {kpi}\n\nError analyzing data: {e}"
    
    def _generate_leaders_laggers_llm_insight(self, manager: str, kpi: str, level: str, text: str) -> str:
        """Generate LLM insights for leaders/laggers analysis"""
        prompt = f"""
        You are a financial performance analyst.
        Below is data for {level} '{manager}' on KPI '{kpi}' showing top and bottom performing partners.

        Write a professional 3–4 bullet point insight covering:
        1. Performance dependency (e.g., dominated by few top partners or evenly spread),
        2. Impact of leaders on total performance,
        3. Weak link from laggers and improvement recommendations,
        4. Any early warning or strategic focus points.

        Data:
        {text}
        """
        return self.gemini.generate_insight(prompt)

# ================= DRASTIC CHANGES =================
class DrasticChangeInsightGenerator(InsightGenerator):
    """Generates insights for drastic performance changes"""
    
    def detect_drastic_changes(self, df: pd.DataFrame) -> pd.DataFrame:
        """Detect YoY KPI changes above threshold"""
        required_cols = [self.config.partner_column, self.config.year_column] + self.config.kpi_list
        if not self.validate_dataframe(df, required_cols):
            self.logger.warning(f"Missing required columns for drastic changes: {required_cols}")
            return pd.DataFrame()
            
        try:
            df_clean = df[df[self.config.partner_column].notna() & df[self.config.year_column].notna()]
            df_sorted = df_clean.sort_values([self.config.partner_column, self.config.year_column])
            change_records = []

            for partner, group in df_sorted.groupby([self.config.partner_column]):
                group = group.sort_values(self.config.year_column)
                for i in range(1, len(group)):
                    prev, curr = group.iloc[i - 1], group.iloc[i]

                    for kpi in self.config.kpi_list:
                        prev_val, curr_val = prev[kpi], curr[kpi]
                        if pd.isna(prev_val) or pd.isna(curr_val) or prev_val == 0:
                            continue
                        
                        pct_change = ((curr_val - prev_val) / abs(prev_val)) * 100

                        if abs(pct_change) >= self.config.change_threshold:
                            change_records.append({
                                'Partner Name': partner,
                                'KPI': kpi,
                                'Year From': prev[self.config.year_column],
                                'Year To': curr[self.config.year_column],
                                'Previous Value': round(prev_val, 2),
                                'Current Value': round(curr_val, 2),
                                '% Change': round(pct_change, 2)
                            })

            return pd.DataFrame(change_records)
            
        except Exception as e:
            self.logger.error(f"Error detecting drastic changes: {e}")
            return pd.DataFrame()
    
    def generate_change_insights(self, df: pd.DataFrame) -> Dict:
        """Generate insights for drastic changes"""
        change_df = self.detect_drastic_changes(df)
        
        if change_df.empty:
            return {"All": "No significant performance changes detected above threshold."}

        try:
            # Get top partners with most changes
            top_partners = (
                change_df.groupby('Partner Name')['% Change']
                .apply(lambda x: x.abs().mean())
                .sort_values(ascending=False)
                .head(self.config.top_partners)
                .index
            )

            partner_insights = {}
            for partner in top_partners:
                partner_data = change_df[change_df['Partner Name'] == partner]
                insight_text = self._generate_partner_change_insight(partner, partner_data)
                partner_insights[partner] = insight_text
                
            return partner_insights
            
        except Exception as e:
            self.logger.error(f"Error generating change insights: {e}")
            return {"Error": f"Failed to generate change insights: {e}"}
    
    def _generate_partner_change_insight(self, partner: str, partner_data: pd.DataFrame) -> str:
        """Generate change insights for a specific partner"""
        text = f"\n📊 Partner: {partner}\n"

        try:
            prompt = f"""
You are a senior financial analyst.
Analyze the KPI performance changes for partner '{partner}'.

Below is year-over-year data showing KPI shifts beyond ±{self.config.change_threshold}%:
{partner_data}

(3–4 bullet points) that:
1. Identifies which KPIs show the largest positive or negative shifts, with actual % changes.
2. Interprets what these changes reveal about the partner's business direction.
3. Explains the potential impact on overall business performance.
4. Provides 1–2 quantified, strategic recommendations.

Tone: Data-driven, concise, and suitable for a sales performance dashboard.
Use ↑ and ↓ symbols for directionality.
Avoid generic phrasing; use the numbers provided.
"""
            llm_insight = self.gemini.generate_insight(prompt, temperature=0.6)
            text += llm_insight + "\n" + ("-" * 100) + "\n"

            return text
            
        except Exception as e:
            self.logger.error(f"Error generating change insight for partner {partner}: {e}")
            return f"\n Partner: {partner}\nError generating insights: {e}"

# ================= FOCUS AREAS =================
class FocusAreaInsightGenerator(InsightGenerator):
    """Generates focus area insights"""
    
    def identify_focus_areas(self, df: pd.DataFrame) -> pd.DataFrame:
        """Identify performance gaps vs targets or averages"""
        required_cols = [self.config.partner_column] + self.config.kpi_list
        if not self.validate_dataframe(df, required_cols):
            self.logger.warning(f"Missing required columns for focus areas: {required_cols}")
            return pd.DataFrame()
            
        try:
            focus_records = []
            df_clean = df[df[self.config.partner_column].notna()]
            kpi_avg = df_clean[self.config.kpi_list].mean()

            for partner, group in df_clean.groupby([self.config.partner_column]):
                partner_row = group.iloc[-1]  # Latest data
                gaps = {}

                for kpi in self.config.kpi_list:
                    # Look for target columns
                    possible_targets = [col for col in df.columns if 'Target' in col and kpi in col]
                    target_value = partner_row[possible_targets[0]] if possible_targets else kpi_avg[kpi]

                    if target_value and not pd.isna(target_value) and target_value != 0:
                        gap_pct = ((target_value - partner_row[kpi]) / target_value) * 100
                    else:
                        gap_pct = 0

                    gaps[kpi] = round(gap_pct, 2)

                max_gap = max(gaps.values()) if gaps else 0
                sorted_gaps = sorted(gaps.items(), key=lambda x: x[1], reverse=True)

                focus_records.append({
                    'Partner Name': partner,
                    'Focus KPIs': sorted_gaps,
                    'Max Gap': max_gap
                })

            return pd.DataFrame(focus_records)
            
        except Exception as e:
            self.logger.error(f"Error identifying focus areas: {e}")
            return pd.DataFrame()
    
    def generate_focus_insights(self, df: pd.DataFrame) -> Dict:
        """Generate focus area insights"""
        focus_df = self.identify_focus_areas(df)
        
        if focus_df.empty:
            return {"All": "No focus areas identified."}

        try:
            # Get top partners with largest gaps
            top_partners = (
                focus_df.sort_values('Max Gap', ascending=False)
                .head(self.config.top_partners)
            )

            partner_insights = {}
            for _, row in top_partners.iterrows():
                partner = row['Partner Name']
                insight_text = self._generate_partner_focus_insight(partner, row['Focus KPIs'])
                partner_insights[partner] = insight_text
                
            return partner_insights
            
        except Exception as e:
            self.logger.error(f"Error generating focus insights: {e}")
            return {"Error": f"Failed to generate focus insights: {e}"}
    
    def _generate_partner_focus_insight(self, partner: str, focus_kpis: List) -> str:
        """Generate focus insights for a specific partner"""
        text = f"\n🎯 Partner: {partner}\n"

        try:
            prompt = f"""
You are a senior business analyst.
Analyze the performance gaps for partner '{partner}'.

Below are the KPIs and their % gaps (higher = more underperformance):
{focus_kpis}

Your task:
- Focus only on quantitative insights — do not generate generic text.
- Use the % gaps to suggest realistic improvement targets (X%) based on the data.
- Estimate how much total performance (Y%) could improve if this partner closes these gaps partially or fully.

Generate exactly 3–4 concise insights:
1. Identify the top 2–3 KPIs with highest % gaps.
2. Suggest realistic % improvement targets (X%) and expected performance gains.
3. Estimate overall improvement (Y%) if achieved.
4. End with one actionable recommendation.
"""
            llm_insight = self.gemini.generate_insight(prompt, temperature=0.7)
            text += llm_insight + "\n" + ("-" * 100) + "\n"

            return text
            
        except Exception as e:
            self.logger.error(f"Error generating focus insight for partner {partner}: {e}")
            return f"\n Partner: {partner}\nError generating insights: {e}"
            
# ================= MAIN INSIGHT ORCHESTRATOR =================
class InsightOrchestrator:
    """Orchestrates all insight generation across hierarchy levels"""
    
    def __init__(self, gemini_api_key: str, config: InsightConfig = None):
        self.config = config or InsightConfig()
        self.gemini_service = GeminiService(gemini_api_key)
        
        # Initialize all generators
        self.concentration_gen = ConcentrationInsightGenerator(self.gemini_service, self.config)
        self.leaders_laggers_gen = LeadersLaggersInsightGenerator(self.gemini_service, self.config)
        self.drastic_change_gen = DrasticChangeInsightGenerator(self.gemini_service, self.config)
        self.focus_area_gen = FocusAreaInsightGenerator(self.gemini_service, self.config)
        
        self.logger = logging.getLogger(__name__)
    
    def validate_data_requirements(self, df: pd.DataFrame) -> bool:
        """Validate that all required columns exist in the dataframe"""
        required_columns = [self.config.partner_column, self.config.year_column] + self.config.kpi_list
        missing_columns = [col for col in required_columns if col not in df.columns]
        
        if missing_columns:
            self.logger.error(f"Missing required columns: {missing_columns}")
            self.logger.info(f"Available columns: {list(df.columns)}")
            return False
            
        return True
    
    def generate_all_insights(self, df: pd.DataFrame) -> Dict:
        """Generate all types of insights across all hierarchy levels"""
        if not self.validate_data_requirements(df):
            return {"error": "Data validation failed - check required columns"}
            
        all_insights = {}
        
        # Process hierarchy levels in the order specified (ZM first)
        for hierarchy_level in self.config.hierarchy_levels:
            if hierarchy_level not in df.columns:
                self.logger.warning(f"Hierarchy level {hierarchy_level} not found in dataframe. Available: {list(df.columns)}")
                continue
                
            level_insights = {}
            
            # Generate insights for each KPI at this hierarchy level
            kpi_insights = {}
            for kpi in self.config.kpi_list:
                if kpi not in df.columns:
                    self.logger.warning(f"KPI {kpi} not found in dataframe")
                    continue
                    
                # 1. Partner Concentration
                try:
                    concentration_insights = self.concentration_gen.generate_manager_concentration(
                        df, kpi, hierarchy_level
                    )
                    if concentration_insights:
                        kpi_insights[kpi] = {
                            'concentration': concentration_insights
                        }
                except Exception as e:
                    self.logger.error(f"Concentration insight error for {kpi}: {e}")
                
                # 2. Leaders & Laggers
                try:
                    leaders_laggers_insights = self.leaders_laggers_gen.generate_manager_leaders_laggers(
                        df, kpi, hierarchy_level
                    )
                    if leaders_laggers_insights:
                        if kpi not in kpi_insights:
                            kpi_insights[kpi] = {}
                        kpi_insights[kpi]['leaders_laggers'] = leaders_laggers_insights
                except Exception as e:
                    self.logger.error(f"Leaders/Laggers insight error for {kpi}: {e}")
            
            if kpi_insights:
                level_insights['kpi_insights'] = kpi_insights
            
            # 3. Drastic Changes (partner level) - only for BM level
            if hierarchy_level == 'BM':
                try:
                    drastic_changes = self.drastic_change_gen.generate_change_insights(df)
                    if drastic_changes:
                        level_insights['drastic_changes'] = drastic_changes
                except Exception as e:
                    self.logger.error(f"Drastic change insight error: {e}")
            
            # 4. Focus Areas (partner level) - only for BM level
            if hierarchy_level == 'BM':
                try:
                    focus_areas = self.focus_area_gen.generate_focus_insights(df)
                    if focus_areas:
                        level_insights['focus_areas'] = focus_areas
                except Exception as e:
                    self.logger.error(f"Focus area insight error: {e}")
            
            if level_insights:
                all_insights[hierarchy_level] = level_insights
            
        return all_insights
    
    def display_insights(self, insights: Dict):
        """Display insights in formatted output"""
        if 'error' in insights:
            print(f"ERROR: {insights['error']}")
            return
            
        # Display insights in the order of hierarchy levels (ZM first)
        for hierarchy_level in self.config.hierarchy_levels:
            if hierarchy_level not in insights:
                continue
                
            level_insights = insights[hierarchy_level]
            print(f"\n{'='*80}")
            print(f"INSIGHTS FOR {hierarchy_level} LEVEL")
            print(f"{'='*80}")
            
            # Display KPI-based insights
            if 'kpi_insights' in level_insights:
                for kpi, insight_types in level_insights['kpi_insights'].items():
                    print(f"\n--- {kpi} ---")
                    for insight_type, manager_insights in insight_types.items():
                        print(f"\n{insight_type.upper()} INSIGHTS:")
                        if isinstance(manager_insights, dict):
                            for manager, insight in manager_insights.items():
                                print(f"\n{manager}:\n{insight}")
                                print("-" * 60)
                        else:
                            print(f"\n{manager_insights}")
            
            # Display other insights
            for insight_type in ['drastic_changes', 'focus_areas']:
                if insight_type in level_insights:
                    print(f"\n{insight_type.upper()} INSIGHTS:")
                    insights_data = level_insights[insight_type]
                    if isinstance(insights_data, dict):
                        for entity, insight in insights_data.items():
                            print(f"\n{entity}:\n{insight}")
                            print("-" * 60)
                    else:
                        print(f"\n{insights_data}")

# ================= USAGE EXAMPLE =================
def main():
    """Example usage of the insight generation system"""
    
    # Configuration matching your actual column names - ZM first
    insight_config = InsightConfig(
        kpi_list=[
            'Equity Sales',
            'SIP Sales_Achievement',
            'Net Sales through MARS',
            'Investment Net Sales Achievement'
        ],
        hierarchy_levels=['ZM', 'BM', 'Relationship Handler'],  # ZM first
        partner_column='Partner Name_x',
        year_column='FY_Year_x',
        top_managers=3,
        top_partners=2,
        change_threshold=50.0
    )
    
    # Initialize orchestrator
    orchestrator = InsightOrchestrator(
        gemini_api_key= 'AIzaSyADwv71iu5J5m9TK0oV2lDgZxQrTLkg1K0',
        config=insight_config
    )
    
    # Generate all insights
    all_insights = orchestrator.generate_all_insights(merged_with_hierarchyy)
    
    # Display insights
    orchestrator.display_insights(all_insights)
    
    return all_insights

if __name__ == "__main__":
    # Setup logging
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    
    # Run insight generation
    insights = main()


INSIGHTS FOR ZM LEVEL

--- Equity Sales ---

CONCENTRATION INSIGHTS:

SARFARAZ ABDULLA PATEL:
ZM: SARFARAZ ABDULLA PATEL
KPI: Equity Sales
Total Partners under SARFARAZ ABDULLA PATEL: 8842

Partner Concentration Insight for KPI 'Equity Sales':
 - Top 103 partners (1.16% of total partners) contribute 25% of total Equity Sales.
 - Top 398 partners (4.5% of total partners) contribute 50% of total Equity Sales.
 - Top 1046 partners (11.83% of total partners) contribute 75% of total Equity Sales.
 - Top 1260 partners (14.25% of total partners) contribute 80% of total Equity Sales.
 - Top 1850 partners (20.92% of total partners) contribute 90% of total Equity Sales.

Top 2 Partners (for reference):
 - JITENDRA HIRALAL SHAH: 1,371,763,846.24 (1.04% cumulative)
 - Mass Investments: 1,024,503,289.22 (1.82% cumulative)


LLM Insights:
Here are some insights on Sarfaraz Abdulla Patel's Equity Sales performance based on partner concentration:

1.  **High Concentration:** The Equity Sales are high

#### new

In [310]:
import pandas as pd
import numpy as np
import config as cfg
import google.generativeai as genai
from typing import List, Dict, Tuple, Optional
import logging
from dataclasses import dataclass

# ================= CONFIGURATION =================
@dataclass
class InsightConfig:
    """Configuration for insight generation"""
    kpi_list: List[str] = None
    hierarchy_levels: List[str] = None
    partner_column: str = 'Partner Name_x'
    partner_code_column: str = 'Partner Code'
    broker_code_column: str = 'Broker Code'
    relationship_handler_column: str = 'Relationship Handler'
    thresholds: List[int] = None
    top_managers: int = 3
    top_partners: int = 2
    change_threshold: float = 50.0
    year_column: str = 'FY_Year_x'
    
    def __post_init__(self):
        if self.kpi_list is None:
            self.kpi_list = [
                'Equity Sales',
                'SIP Sales_Achievement', 
                'Net Sales through MARS',
                'Investment Net Sales Achievement'
            ]
        if self.hierarchy_levels is None:
            self.hierarchy_levels = ['ZM', 'BM', 'Relationship Handler'] 
        if self.thresholds is None:
            self.thresholds = [25, 50, 75, 80, 90]

# ================= GEMINI SERVICE =================
class GeminiService:
    """Handles all Gemini LLM interactions"""
    def __init__(self, api_key: str, model_name: str = "gemini-2.0-flash"):
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel(model_name)
        self.logger = logging.getLogger(__name__)
    
    def generate_insight(self, prompt: str, temperature: float = 0.7) -> str:
        """Generate insight using Gemini model"""
        try:
            response = self.model.generate_content(
                prompt,
                generation_config={
                    "temperature": temperature,
                    "top_p": 0.9,
                    "max_output_tokens": 800
                }
            )
            return response.text.strip()
        except Exception as e:
            self.logger.error(f"Gemini API error: {e}")
            return f"Error generating insight: {e}"

# ================= BASE INSIGHT GENERATOR =================
class InsightGenerator:
    """Base class for all insight generators"""
    def __init__(self, gemini_service: GeminiService, config: InsightConfig):
        self.gemini = gemini_service
        self.config = config
        self.logger = logging.getLogger(__name__)
    
    def validate_dataframe(self, df: pd.DataFrame, required_columns: List[str]) -> bool:
        """Validate that required columns exist in dataframe"""
        missing_columns = [col for col in required_columns if col not in df.columns]
        if missing_columns:
            self.logger.warning(f"Missing columns in dataframe: {missing_columns}")
            return False
        return True
    
    def get_top_managers(self, df: pd.DataFrame, kpi: str, hierarchy_level: str) -> List[str]:
        """Get top managers by KPI for given hierarchy level"""
        if not self.validate_dataframe(df, [hierarchy_level, kpi]):
            return []
            
        # Filter out NaN values in hierarchy level
        df_clean = df[df[hierarchy_level].notna()]
        
        if df_clean.empty:
            return []
        
        return (
            df_clean.groupby(hierarchy_level)[kpi]
            .sum()
            .sort_values(ascending=False)
            .head(self.config.top_managers)
            .index
            .tolist()
        )

# ================= PARTNER CONCENTRATION =================
class ConcentrationInsightGenerator(InsightGenerator):
    """Generates partner concentration insights"""
    
    def calculate_concentration(self, df: pd.DataFrame, kpi: str) -> Tuple[Dict, int, pd.DataFrame]:
        """Calculate concentration metrics for a group"""
        required_cols = [self.config.partner_column, kpi]
        if not self.validate_dataframe(df, required_cols):
            return {}, 0, pd.DataFrame()
            
        try:
            # Filter out partners with no name
            df_clean = df[df[self.config.partner_column].notna()]
            
            if df_clean.empty:
                return {}, 0, pd.DataFrame()
            
            df_sorted = (
                df_clean.groupby(self.config.partner_column)[kpi]
                .sum()
                .sort_values(ascending=False)
                .reset_index()
            )

            total_kpi = df_sorted[kpi].sum()
            if total_kpi == 0:
                return {}, len(df_sorted), df_sorted
                
            df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
            df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100

            total_partners = len(df_sorted)
            conc_summary = {}

            for threshold in self.config.thresholds:
                try:
                    num_partners = (df_sorted['Cumulative KPI %'] >= threshold).idxmax() + 1
                    conc_summary[f'Partners for {threshold}%'] = num_partners
                    conc_summary[f'% of Total Partners for {threshold}%'] = round(
                        num_partners / total_partners * 100, 2
                    )
                except (ValueError, IndexError):
                    conc_summary[f'Partners for {threshold}%'] = total_partners
                    conc_summary[f'% of Total Partners for {threshold}%'] = 100.0

            return conc_summary, total_partners, df_sorted
            
        except Exception as e:
            self.logger.error(f"Error calculating concentration: {e}")
            return {}, 0, pd.DataFrame()
    
    def generate_manager_concentration(self, df: pd.DataFrame, kpi: str, hierarchy_level: str) -> Dict:
        """Generate concentration insights for all managers at given hierarchy level"""
        required_cols = [hierarchy_level, self.config.partner_column, kpi]
        if not self.validate_dataframe(df, required_cols):
            self.logger.warning(f"Missing required columns for concentration analysis: {required_cols}")
            return {}
            
        manager_insights = {}
        top_managers = self.get_top_managers(df, kpi, hierarchy_level)
        
        if not top_managers:
            self.logger.warning(f"No managers found for {hierarchy_level} level with KPI {kpi}")
            return {}
        
        for manager in top_managers:
            try:
                manager_data = df[df[hierarchy_level] == manager]
                if manager_data.empty:
                    continue
                    
                conc_summary, total_partners, df_sorted = self.calculate_concentration(manager_data, kpi)
                
                if not conc_summary:
                    continue
                    
                # Create concentration text
                insight_text = self._format_concentration_text(
                    manager, kpi, hierarchy_level, total_partners, conc_summary, df_sorted
                )
                
                # Generate LLM insights
                llm_insight = self._generate_concentration_llm_insight(
                    manager, kpi, hierarchy_level, insight_text
                )
                
                manager_insights[manager] = insight_text + "\n\nLLM Insights:\n" + llm_insight
                
            except Exception as e:
                self.logger.error(f"Error processing manager {manager} for concentration: {e}")
                continue
            
        return manager_insights
    
    def _format_concentration_text(self, manager: str, kpi: str, level: str, 
                                 total_partners: int, conc_summary: Dict, df_sorted: pd.DataFrame) -> str:
        """Format concentration analysis into readable text"""
        text = f"{level}: {manager}\nKPI: {kpi}\n"
        text += f"Total Partners under {manager}: {total_partners}\n\n"
        text += f"Partner Concentration Insight for KPI '{kpi}':\n"
        
        for threshold in self.config.thresholds:
            partners_key = f'Partners for {threshold}%'
            percent_key = f'% of Total Partners for {threshold}%'
            
            if partners_key in conc_summary:
                text += (
                    f" - Top {conc_summary[partners_key]} partners "
                    f"({conc_summary[percent_key]}% of total partners) "
                    f"contribute {threshold}% of total {kpi}.\n"
                )
        
        # Add top partners for context
        if not df_sorted.empty:
            top_partners_data = df_sorted.head(self.config.top_partners)[[self.config.partner_column, kpi, 'Cumulative KPI %']]
            text += f"\nTop {self.config.top_partners} Partners (for reference):\n"
            for _, row in top_partners_data.iterrows():
                partner_name = row[self.config.partner_column]
                text += f" - {partner_name}: {row[kpi]:,.2f} ({row['Cumulative KPI %']:.2f}% cumulative)\n"
            
        return text
    
    def _generate_concentration_llm_insight(self, manager: str, kpi: str, level: str, text: str) -> str:
        """Generate LLM insights for concentration analysis"""
        prompt = f"""
        You are a financial performance analyst.
        Below is data for {level} '{manager}' on KPI '{kpi}' showing partner concentration.
        Analyze how concentrated performance is (few vs many partners contributing).
        Write 3–4 crisp insights about:
        1. Level of concentration (high/medium/low),
        2. Implications on business dependency,
        3. Partner development or risk recommendations.

        Data:
        {text}
        """
        return self.gemini.generate_insight(prompt)

# ================= LEADERS & LAGGERS =================
class LeadersLaggersInsightGenerator(InsightGenerator):
    """Generates leaders and laggers insights"""
    
    def generate_manager_leaders_laggers(self, df: pd.DataFrame, kpi: str, hierarchy_level: str) -> Dict:
        """Generate leaders/laggers insights for all managers at given hierarchy level"""
        required_cols = [hierarchy_level, self.config.partner_column, kpi]
        if not self.validate_dataframe(df, required_cols):
            self.logger.warning(f"Missing required columns for leaders/laggers analysis: {required_cols}")
            return {}
            
        manager_insights = {}
        top_managers = self.get_top_managers(df, kpi, hierarchy_level)
        
        if not top_managers:
            self.logger.warning(f"No managers found for {hierarchy_level} level with KPI {kpi}")
            return {}
        
        for manager in top_managers:
            try:
                manager_data = df[df[hierarchy_level] == manager]
                if manager_data.empty:
                    continue
                    
                insight_text = self._analyze_leaders_laggers(manager, kpi, hierarchy_level, manager_data)
                
                # Generate LLM insights
                llm_insight = self._generate_leaders_laggers_llm_insight(
                    manager, kpi, hierarchy_level, insight_text
                )
                
                manager_insights[manager] = insight_text + "\n\nLLM Insights:\n" + llm_insight
                
            except Exception as e:
                self.logger.error(f"Error processing manager {manager} for leaders/laggers: {e}")
                continue
            
        return manager_insights
    
    def _analyze_leaders_laggers(self, manager: str, kpi: str, level: str, df: pd.DataFrame) -> str:
        """Analyze leaders and laggers for a manager"""
        try:
            # Filter out partners with no name
            df_clean = df[df[self.config.partner_column].notna()]
            
            if df_clean.empty:
                return f"{level}: {manager}\nKPI: {kpi}\n\nNo partner data available."
            
            # Sort partners by performance
            perf = (
                df_clean.groupby(self.config.partner_column)[kpi]
                .sum()
                .sort_values(ascending=False)
                .reset_index()
            )

            if perf.empty:
                return f"{level}: {manager}\nKPI: {kpi}\n\nNo partner data available."

            total_perf = perf[kpi].sum()
            if total_perf == 0:
                return f"{level}: {manager}\nKPI: {kpi}\n\nNo performance data available."

            # Leaders (Top N)
            leaders = perf.head(self.config.top_partners).copy()
            leaders['% Share'] = round(leaders[kpi] / total_perf * 100, 2)

            # Laggers (Bottom N)
            laggers = perf.tail(self.config.top_partners).copy()
            laggers['% Share'] = round(laggers[kpi] / total_perf * 100, 2)

            # Performance summary
            leaders_contrib = leaders['% Share'].sum()
            laggers_contrib = laggers['% Share'].sum()

            text = f"{level}: {manager}\nKPI: {kpi}\n\n"
            text += f"Total Partners under {manager}: {len(perf)}\n"
            text += f"Top {self.config.top_partners} Leaders contribute: {leaders_contrib:.2f}% of total {kpi}\n"
            text += f"Bottom {self.config.top_partners} Laggers contribute: {laggers_contrib:.2f}% of total {kpi}\n\n"

            text += "Top Performing (Leader) Partners:\n"
            for _, row in leaders.iterrows():
                partner_name = row[self.config.partner_column]
                text += f" - {partner_name}: {row[kpi]:,.2f} ({row['% Share']}%)\n"

            text += "\nLow Performing (Lagger) Partners:\n"
            for _, row in laggers.iterrows():
                partner_name = row[self.config.partner_column]
                text += f" - {partner_name}: {row[kpi]:,.2f} ({row['% Share']}%)\n"

            return text
            
        except Exception as e:
            self.logger.error(f"Error analyzing leaders/laggers for {manager}: {e}")
            return f"{level}: {manager}\nKPI: {kpi}\n\nError analyzing data: {e}"
    
    def _generate_leaders_laggers_llm_insight(self, manager: str, kpi: str, level: str, text: str) -> str:
        """Generate LLM insights for leaders/laggers analysis"""
        prompt = f"""
        You are a financial performance analyst.
        Below is data for {level} '{manager}' on KPI '{kpi}' showing top and bottom performing partners.

        Write a professional 3–4 bullet point insight covering:
        1. Performance dependency (e.g., dominated by few top partners or evenly spread),
        2. Impact of leaders on total performance,
        3. Weak link from laggers and improvement recommendations,
        4. Any early warning or strategic focus points.

        Data:
        {text}
        """
        return self.gemini.generate_insight(prompt)

# ================= DRASTIC CHANGES =================
class DrasticChangeInsightGenerator(InsightGenerator):
    """Generates insights for drastic performance changes"""
    
    def detect_drastic_changes(self, df: pd.DataFrame) -> pd.DataFrame:
        """Detect YoY KPI changes above threshold"""
        required_cols = [self.config.partner_column, self.config.year_column] + self.config.kpi_list
        if not self.validate_dataframe(df, required_cols):
            self.logger.warning(f"Missing required columns for drastic changes: {required_cols}")
            return pd.DataFrame()
            
        try:
            df_clean = df[df[self.config.partner_column].notna() & df[self.config.year_column].notna()]
            df_sorted = df_clean.sort_values([self.config.partner_column, self.config.year_column])
            change_records = []

            for partner, group in df_sorted.groupby([self.config.partner_column]):
                group = group.sort_values(self.config.year_column)
                for i in range(1, len(group)):
                    prev, curr = group.iloc[i - 1], group.iloc[i]

                    for kpi in self.config.kpi_list:
                        prev_val, curr_val = prev[kpi], curr[kpi]
                        if pd.isna(prev_val) or pd.isna(curr_val) or prev_val == 0:
                            continue
                        
                        pct_change = ((curr_val - prev_val) / abs(prev_val)) * 100

                        if abs(pct_change) >= self.config.change_threshold:
                            change_records.append({
                                'Partner Name': partner,
                                'KPI': kpi,
                                'Year From': prev[self.config.year_column],
                                'Year To': curr[self.config.year_column],
                                'Previous Value': round(prev_val, 2),
                                'Current Value': round(curr_val, 2),
                                '% Change': round(pct_change, 2)
                            })

            return pd.DataFrame(change_records)
            
        except Exception as e:
            self.logger.error(f"Error detecting drastic changes: {e}")
            return pd.DataFrame()
    
    def generate_change_insights(self, df: pd.DataFrame) -> Dict:
        """Generate insights for drastic changes"""
        change_df = self.detect_drastic_changes(df)
        
        if change_df.empty:
            return {"All": "No significant performance changes detected above threshold."}

        try:
            # Get top partners with most changes
            top_partners = (
                change_df.groupby('Partner Name')['% Change']
                .apply(lambda x: x.abs().mean())
                .sort_values(ascending=False)
                .head(self.config.top_partners)
                .index
            )

            partner_insights = {}
            for partner in top_partners:
                partner_data = change_df[change_df['Partner Name'] == partner]
                insight_text = self._generate_partner_change_insight(partner, partner_data)
                partner_insights[partner] = insight_text
                
            return partner_insights
            
        except Exception as e:
            self.logger.error(f"Error generating change insights: {e}")
            return {"Error": f"Failed to generate change insights: {e}"}
    
    def _generate_partner_change_insight(self, partner: str, partner_data: pd.DataFrame) -> str:
        """Generate change insights for a specific partner"""
        text = f"\n Partner: {partner}\n"

        try:
            prompt = f"""
You are a senior financial analyst.
Analyze the KPI performance changes for partner '{partner}'.

Below is year-over-year data showing KPI shifts beyond ±{self.config.change_threshold}%:
{partner_data}

(3–4 bullet points) that:
1. Identifies which KPIs show the largest positive or negative shifts, with actual % changes.
2. Interprets what these changes reveal about the partner's business direction.
3. Explains the potential impact on overall business performance.
4. Provides 1–2 quantified, strategic recommendations.

Tone: Data-driven, concise, and suitable for a sales performance dashboard.
Use ↑ and ↓ symbols for directionality.
Avoid generic phrasing; use the numbers provided.
"""
            llm_insight = self.gemini.generate_insight(prompt, temperature=0.6)
            text += llm_insight + "\n" + ("-" * 100) + "\n"

            return text
            
        except Exception as e:
            self.logger.error(f"Error generating change insight for partner {partner}: {e}")
            return f"\n Partner: {partner}\nError generating insights: {e}"

# ================= FOCUS AREAS =================
class FocusAreaInsightGenerator(InsightGenerator):
    """Generates focus area insights"""
    
    def identify_focus_areas(self, df: pd.DataFrame) -> pd.DataFrame:
        """Identify performance gaps vs targets or averages"""
        required_cols = [self.config.partner_column] + self.config.kpi_list
        if not self.validate_dataframe(df, required_cols):
            self.logger.warning(f"Missing required columns for focus areas: {required_cols}")
            return pd.DataFrame()
            
        try:
            focus_records = []
            df_clean = df[df[self.config.partner_column].notna()]
            kpi_avg = df_clean[self.config.kpi_list].mean()

            for partner, group in df_clean.groupby([self.config.partner_column]):
                partner_row = group.iloc[-1]  # Latest data
                gaps = {}

                for kpi in self.config.kpi_list:
                    # Look for target columns
                    possible_targets = [col for col in df.columns if 'Target' in col and kpi in col]
                    target_value = partner_row[possible_targets[0]] if possible_targets else kpi_avg[kpi]

                    if target_value and not pd.isna(target_value) and target_value != 0:
                        gap_pct = ((target_value - partner_row[kpi]) / target_value) * 100
                    else:
                        gap_pct = 0

                    gaps[kpi] = round(gap_pct, 2)

                max_gap = max(gaps.values()) if gaps else 0
                sorted_gaps = sorted(gaps.items(), key=lambda x: x[1], reverse=True)

                focus_records.append({
                    'Partner Name': partner,
                    'Focus KPIs': sorted_gaps,
                    'Max Gap': max_gap
                })

            return pd.DataFrame(focus_records)
            
        except Exception as e:
            self.logger.error(f"Error identifying focus areas: {e}")
            return pd.DataFrame()
    
    def generate_focus_insights(self, df: pd.DataFrame) -> Dict:
        """Generate focus area insights"""
        focus_df = self.identify_focus_areas(df)
        
        if focus_df.empty:
            return {"All": "No focus areas identified."}

        try:
            # Get top partners with largest gaps
            top_partners = (
                focus_df.sort_values('Max Gap', ascending=False)
                .head(self.config.top_partners)
            )

            partner_insights = {}
            for _, row in top_partners.iterrows():
                partner = row['Partner Name']
                insight_text = self._generate_partner_focus_insight(partner, row['Focus KPIs'])
                partner_insights[partner] = insight_text
                
            return partner_insights
            
        except Exception as e:
            self.logger.error(f"Error generating focus insights: {e}")
            return {"Error": f"Failed to generate focus insights: {e}"}
    
    def _generate_partner_focus_insight(self, partner: str, focus_kpis: List) -> str:
        """Generate focus insights for a specific partner"""
        text = f"\n🎯 Partner: {partner}\n"

        try:
            prompt = f"""
You are a senior business analyst.
Analyze the performance gaps for partner '{partner}'.

Below are the KPIs and their % gaps (higher = more underperformance):
{focus_kpis}

Your task:
- Focus only on quantitative insights — do not generate generic text.
- Use the % gaps to suggest realistic improvement targets (X%) based on the data.
- Estimate how much total performance (Y%) could improve if this partner closes these gaps partially or fully.

Generate exactly 3–4 concise insights:
1. Identify the top 2–3 KPIs with highest % gaps.
2. Suggest realistic % improvement targets (X%) and expected performance gains.
3. Estimate overall improvement (Y%) if achieved.
4. End with one actionable recommendation.
"""
            llm_insight = self.gemini.generate_insight(prompt, temperature=0.7)
            text += llm_insight + "\n" + ("-" * 100) + "\n"

            return text
            
        except Exception as e:
            self.logger.error(f"Error generating focus insight for partner {partner}: {e}")
            return f"\n Partner: {partner}\nError generating insights: {e}"
            
# ================= MAIN INSIGHT ORCHESTRATOR =================
class InsightOrchestrator:
    """Orchestrates all insight generation across hierarchy levels"""
    
    def __init__(self, gemini_api_key: str, config: InsightConfig = None):
        self.config = config or InsightConfig()
        self.gemini_service = GeminiService(gemini_api_key)
        
        # Initialize all generators
        self.concentration_gen = ConcentrationInsightGenerator(self.gemini_service, self.config)
        self.leaders_laggers_gen = LeadersLaggersInsightGenerator(self.gemini_service, self.config)
        self.drastic_change_gen = DrasticChangeInsightGenerator(self.gemini_service, self.config)
        self.focus_area_gen = FocusAreaInsightGenerator(self.gemini_service, self.config)
        
        self.logger = logging.getLogger(__name__)
    
    def validate_data_requirements(self, df: pd.DataFrame) -> bool:
        """Validate that all required columns exist in the dataframe"""
        required_columns = [self.config.partner_column, self.config.year_column] + self.config.kpi_list
        missing_columns = [col for col in required_columns if col not in df.columns]
        
        if missing_columns:
            self.logger.error(f"Missing required columns: {missing_columns}")
            self.logger.info(f"Available columns: {list(df.columns)}")
            return False
            
        return True
    
    def generate_all_insights(self, df: pd.DataFrame) -> Dict:
        """Generate all types of insights across all hierarchy levels"""
        if not self.validate_data_requirements(df):
            return {"error": "Data validation failed - check required columns"}
            
        all_insights = {}
        
        # Process hierarchy levels in the order specified (ZM first)
        for hierarchy_level in self.config.hierarchy_levels:
            if hierarchy_level not in df.columns:
                self.logger.warning(f"Hierarchy level {hierarchy_level} not found in dataframe. Available: {list(df.columns)}")
                continue
                
            level_insights = {}
            
            # Generate insights for each KPI at this hierarchy level
            kpi_insights = {}
            for kpi in self.config.kpi_list:
                if kpi not in df.columns:
                    self.logger.warning(f"KPI {kpi} not found in dataframe")
                    continue
                    
                # 1. Partner Concentration
                try:
                    concentration_insights = self.concentration_gen.generate_manager_concentration(
                        df, kpi, hierarchy_level
                    )
                    if concentration_insights:
                        kpi_insights[kpi] = {
                            'concentration': concentration_insights
                        }
                except Exception as e:
                    self.logger.error(f"Concentration insight error for {kpi}: {e}")
                
                # 2. Leaders & Laggers
                try:
                    leaders_laggers_insights = self.leaders_laggers_gen.generate_manager_leaders_laggers(
                        df, kpi, hierarchy_level
                    )
                    if leaders_laggers_insights:
                        if kpi not in kpi_insights:
                            kpi_insights[kpi] = {}
                        kpi_insights[kpi]['leaders_laggers'] = leaders_laggers_insights
                except Exception as e:
                    self.logger.error(f"Leaders/Laggers insight error for {kpi}: {e}")
            
            if kpi_insights:
                level_insights['kpi_insights'] = kpi_insights
            
            # 3. Drastic Changes - Generate for ALL hierarchy levels
            try:
                drastic_changes = self.drastic_change_gen.generate_change_insights(df)
                # REMOVED THE FILTERING CONDITION - always include if available
                if drastic_changes:
                    level_insights['drastic_changes'] = drastic_changes
                    self.logger.info(f"Generated drastic changes for {hierarchy_level}")
            except Exception as e:
                self.logger.error(f"Drastic change insight error for {hierarchy_level}: {e}")
            
            # 4. Focus Areas - Generate for ALL hierarchy levels  
            try:
                focus_areas = self.focus_area_gen.generate_focus_insights(df)
                # REMOVED THE FILTERING CONDITION - always include if available
                if focus_areas:
                    level_insights['focus_areas'] = focus_areas
                    self.logger.info(f"Generated focus areas for {hierarchy_level}")
            except Exception as e:
                self.logger.error(f"Focus area insight error for {hierarchy_level}: {e}")
            
            if level_insights:
                all_insights[hierarchy_level] = level_insights
            
        return all_insights
    
    def display_insights(self, insights: Dict):
        """Display insights in formatted output"""
        if 'error' in insights:
            print(f"ERROR: {insights['error']}")
            return
            
        # Display insights in the order of hierarchy levels (ZM first)
        for hierarchy_level in self.config.hierarchy_levels:
            if hierarchy_level not in insights:
                continue
                
            level_insights = insights[hierarchy_level]
            print(f"\n{'='*80}")
            print(f"INSIGHTS FOR {hierarchy_level} LEVEL")
            print(f"{'='*80}")
            
            # Display KPI-based insights
            if 'kpi_insights' in level_insights:
                for kpi, insight_types in level_insights['kpi_insights'].items():
                    print(f"\n--- {kpi} ---")
                    for insight_type, manager_insights in insight_types.items():
                        print(f"\n{insight_type.upper()} INSIGHTS:")
                        if isinstance(manager_insights, dict):
                            for manager, insight in manager_insights.items():
                                print(f"\n{manager}:\n{insight}")
                                print("-" * 60)
                        else:
                            print(f"\n{manager_insights}")
            
            # Display drastic changes
            if 'drastic_changes' in level_insights:
                print(f"\nDRASTIC CHANGES INSIGHTS:")
                changes_data = level_insights['drastic_changes']
                if isinstance(changes_data, dict):
                    for entity, insight in changes_data.items():
                        print(f"\n{entity}:\n{insight}")
                        print("-" * 60)
                else:
                    print(f"\n{changes_data}")
            
            # Display focus areas
            if 'focus_areas' in level_insights:
                print(f"\nFOCUS AREAS INSIGHTS:")
                focus_data = level_insights['focus_areas']
                if isinstance(focus_data, dict):
                    for entity, insight in focus_data.items():
                        print(f"\n{entity}:\n{insight}")
                        print("-" * 60)
                else:
                    print(f"\n{focus_data}")

# ================= DEBUGGING FUNCTION =================
def debug_insight_generation(df):
    """Debug function to check why insights aren't generating"""
    print("🔍 DEBUGGING INSIGHT GENERATION")
    print(f"DataFrame shape: {df.shape}")
    print(f"Available columns: {list(df.columns)}")
    
    # Check for required columns
    required_cols = ['Partner Name_x', 'FY_Year_x'] + ['Equity Sales', 'SIP Sales_Achievement', 'Net Sales through MARS', 'Investment Net Sales Achievement']
    missing_cols = [col for col in required_cols if col not in df.columns]
    if missing_cols:
        print(f"❌ Missing columns: {missing_cols}")
    else:
        print("✅ All required columns present")
    
    # Check hierarchy levels
    hierarchy_levels = ['ZM', 'BM', 'Relationship Handler']
    available_hierarchy = [level for level in hierarchy_levels if level in df.columns]
    print(f"Available hierarchy levels: {available_hierarchy}")
    
    # Check for year data
    if 'FY_Year_x' in df.columns:
        print(f"Year values: {df['FY_Year_x'].unique()}")
    
    # Check for partner data
    if 'Partner Name_x' in df.columns:
        print(f"Number of unique partners: {df['Partner Name_x'].nunique()}")
        print(f"Sample partners: {df['Partner Name_x'].dropna().unique()[:5]}")
        
    # Check for KPI data
    kpis = ['Equity Sales', 'SIP Sales_Achievement', 'Net Sales through MARS', 'Investment Net Sales Achievement']
    for kpi in kpis:
        if kpi in df.columns:
            non_null_count = df[kpi].notna().sum()
            print(f"KPI '{kpi}': {non_null_count} non-null values, range: {df[kpi].min():.2f} to {df[kpi].max():.2f}")

# ================= UPDATED MAIN FUNCTION =================
def main():
    """Updated main function with debugging"""
    
    # First, debug the data
    debug_insight_generation(merged_with_hierarchyy)
    
    # Configuration
    insight_config = InsightConfig(
        kpi_list=[
            'Equity Sales',
            'SIP Sales_Achievement',
            'Net Sales through MARS',
            'Investment Net Sales Achievement'
        ],
        hierarchy_levels=['ZM', 'BM', 'Relationship Handler'],
        partner_column='Partner Name_x',
        year_column='FY_Year_x',
        top_managers=3,
        top_partners=2,
        change_threshold=50.0
    )
    
    # Initialize orchestrator
    orchestrator = InsightOrchestrator(
        gemini_api_key='AIzaSyADwv71iu5J5m9TK0oV2lDgZxQrTLkg1K0',
        config=insight_config
    )
    
    # Generate all insights
    print("\n GENERATING INSIGHTS...")
    all_insights = orchestrator.generate_all_insights(merged_with_hierarchyy)
    
    # Display insights
    print("\n DISPLAYING GENERATED INSIGHTS:")
    orchestrator.display_insights(all_insights)
    
    return all_insights

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')  
    insights = main()

🔍 DEBUGGING INSIGHT GENERATION
DataFrame shape: (81001, 142)
Available columns: ['Sr No.', 'Partner Code', 'Partner Name_x', 'Center_x', 'Category_x', 'Relationship Handler', 'Investment Net Sales Target', 'Investment Net Sales Achievement', 'Investment Net Sales % Achievement', 'Equity Sales', 'MIP Sales', 'Gold Sales', 'Sales in Physical Assets', 'Sales in Direct Equity', 'FD + Bond (Primary Market) Sales', 'Secondary Market Bond Sales', 'Net Sales Through Realty', 'Net NJ PMS Sales', 'Net Non-NJ PMS Sales', 'Net Sales through MARS', 'SIP Sales Target', 'SIP Sales Achievement', 'SIP Sales % Achievement', 'Fresh Gross SIP Sales', 'SIP Closure / Termination', 'FY_Year_x', 'Sr No', 'Broker Code', 'Doer Name_x', 'Doer Type_x', 'Center_y', 'Equity Net Sales_Target', 'Equity Net Sales_Achievement', 'Equity Net Sales_% Achievement', 'Insurance_Target', 'Insurance_Achievement', 'Insurance_% Achievement', 'SIP Sales_Target', 'SIP Sales_Achievement', 'SIP Sales_% Achievement', 'Client Acquisit

### sql- store insights in db

In [319]:
import pandas as pd
from sqlalchemy import create_engine, text
from typing import List, Dict
import logging

class InsightDatabaseSaver:
    """Pure database saver - NO API KEY NEEDED"""
    
    def __init__(self, connection_string: str = "mysql+pymysql://root:1234@localhost:3306/Insights_nj"):
        self.engine = create_engine(connection_string)
        self.logger = logging.getLogger(__name__)
        self._ensure_table_exists()
    
    def _ensure_table_exists(self):
        """Ensure the AllInsights table exists"""
        create_table_query = """
        CREATE TABLE IF NOT EXISTS AllInsights (
            ZM VARCHAR(255),
            BM VARCHAR(255),
            Insight_Type VARCHAR(100),
            Insight TEXT,
            INDEX idx_zm (ZM),
            INDEX idx_bm (BM),
            INDEX idx_insight_type (Insight_Type)
        )
        """
        try:
            with self.engine.connect() as conn:
                conn.execute(text("DROP TABLE IF EXISTS AllInsights"))
                conn.execute(text(create_table_query))
                conn.commit()
            self.logger.info("AllInsights table created successfully")
        except Exception as e:
            self.logger.error(f"Error creating table: {e}")
            raise
    
    def save_all_insights(self, all_insights: Dict, hierarchy_df: pd.DataFrame):
        """Save all insights from the InsightOrchestrator output"""
        print(f"🔍 DEBUG: Starting save_all_insights")
        print(f"🔍 DEBUG: all_insights keys: {list(all_insights.keys())}")
        
        total_saved = 0
        
        # Process each hierarchy level
        for hierarchy_level, level_insights in all_insights.items():
            print(f"\n🔍 DEBUG: Processing {hierarchy_level} level")
            
            # 1. Save KPI-based insights (Concentration & Leaders/Laggers)
            if 'kpi_insights' in level_insights:
                saved = self._save_kpi_insights(level_insights['kpi_insights'], hierarchy_level, hierarchy_df)
                total_saved += saved
            
            # 2. Save Drastic Changes
            if 'drastic_changes' in level_insights:
                saved = self._save_drastic_changes(level_insights['drastic_changes'], hierarchy_level)
                total_saved += saved
            
            # 3. Save Focus Areas
            if 'focus_areas' in level_insights:
                saved = self._save_focus_areas(level_insights['focus_areas'], hierarchy_level)
                total_saved += saved
        
        print(f"\n✅ TOTAL: Saved {total_saved} insights to database")
        return total_saved
    
    def _save_kpi_insights(self, kpi_insights: Dict, hierarchy_level: str, hierarchy_df: pd.DataFrame) -> int:
        """Save KPI-based insights (Concentration and Leaders/Laggers)"""
        records = []
        
        for kpi, insight_types in kpi_insights.items():
            print(f"🔍 DEBUG: Processing KPI: {kpi}")
            
            # Process Concentration insights
            if 'concentration' in insight_types:
                concentration_data = insight_types['concentration']
                print(f"🔍 DEBUG: Found {len(concentration_data)} concentration insights")
                
                for manager, insight_text in concentration_data.items():
                    bm_info = self._get_bm_for_manager(manager, hierarchy_level, hierarchy_df)
                    records.append({
                        "ZM": manager if hierarchy_level == 'ZM' else bm_info['zm'],
                        "BM": manager if hierarchy_level == 'BM' else bm_info['bm'],
                        "Insight_Type": "Partner Concentration",
                        "Insight": insight_text
                    })
            
            # Process Leaders & Laggers insights
            if 'leaders_laggers' in insight_types:
                leaders_data = insight_types['leaders_laggers']
                print(f"🔍 DEBUG: Found {len(leaders_data)} leaders/laggers insights")
                
                for manager, insight_text in leaders_data.items():
                    bm_info = self._get_bm_for_manager(manager, hierarchy_level, hierarchy_df)
                    records.append({
                        "ZM": manager if hierarchy_level == 'ZM' else bm_info['zm'],
                        "BM": manager if hierarchy_level == 'BM' else bm_info['bm'],
                        "Insight_Type": "Leaders & Laggers",
                        "Insight": insight_text
                    })
        
        return self._save_records(records, "KPI insights")
    
    def _save_drastic_changes(self, drastic_changes: Dict, hierarchy_level: str) -> int:
        """Save drastic change insights"""
        records = []
        
        print(f"🔍 DEBUG: Drastic changes type: {type(drastic_changes)}, content: {drastic_changes}")
        
        if isinstance(drastic_changes, dict):
            # Handle the case where drastic_changes is a dict of partner insights
            for partner, insight_text in drastic_changes.items():
                if partner not in ['All', 'Error']:
                    records.append({
                        "ZM": None,  # Partner-level insights don't have specific ZM/BM
                        "BM": None,
                        "Insight_Type": "Drastic Change Analysis",
                        "Insight": insight_text
                    })
        
        return self._save_records(records, "drastic changes")
    
    def _save_focus_areas(self, focus_areas: Dict, hierarchy_level: str) -> int:
        """Save focus area insights"""
        records = []
        
        print(f"🔍 DEBUG: Focus areas type: {type(focus_areas)}, content: {focus_areas}")
        
        if isinstance(focus_areas, dict):
            # Handle the case where focus_areas is a dict of partner insights
            for partner, insight_text in focus_areas.items():
                if partner not in ['All', 'Error']:
                    records.append({
                        "ZM": None,  # Partner-level insights don't have specific ZM/BM
                        "BM": None,
                        "Insight_Type": "Area to Focus",
                        "Insight": insight_text
                    })
        
        return self._save_records(records, "focus areas")
    
    def _get_bm_for_manager(self, manager: str, hierarchy_level: str, hierarchy_df: pd.DataFrame) -> Dict:
        """Get BM information for a manager based on hierarchy level"""
        if hierarchy_level == 'ZM':
            # For ZM, find their BMs
            bm_list = hierarchy_df.loc[hierarchy_df["ZM"] == manager, "BM"].unique().tolist()
            return {
                'zm': manager,
                'bm': bm_list[0] if bm_list else "Unknown BM"
            }
        elif hierarchy_level == 'BM':
            # For BM, find their ZM
            zm_list = hierarchy_df.loc[hierarchy_df["BM"] == manager, "ZM"].unique().tolist()
            return {
                'zm': zm_list[0] if zm_list else "Unknown ZM",
                'bm': manager
            }
        else:  # Relationship Handler
            return {
                'zm': "Unknown ZM",
                'bm': "Unknown BM"
            }
    
    def _save_records(self, records: List[Dict], insight_type: str) -> int:
        """Save records to database and return count of saved records"""
        print(f"🔍 DEBUG: _save_records called with {len(records)} records for {insight_type}")
        
        if not records:
            print(f"❌ WARNING: No records to save for {insight_type}")
            return 0
        
        df_to_save = pd.DataFrame(records)
        print(f"🔍 DEBUG: DataFrame shape: {df_to_save.shape}")
        
        # Ensure correct column order and handle None values
        df_to_save = df_to_save[["ZM", "BM", "Insight_Type", "Insight"]]
        df_to_save = df_to_save.fillna('')  # Replace None with empty string

        try:
            df_to_save.to_sql(
                name="AllInsights",
                con=self.engine,
                if_exists="append",
                index=False
            )
            print(f"✅ SUCCESS: Saved {len(df_to_save)} {insight_type} to MySQL")
            
            # Show sample of what was saved
            print(f"📊 Sample of saved {insight_type}:")
            sample = df_to_save.head(3)[["ZM", "BM", "Insight_Type"]]
            for _, row in sample.iterrows():
                print(f"   - ZM: {row['ZM']}, BM: {row['BM']}, Type: {row['Insight_Type']}")
            
            return len(df_to_save)
            
        except Exception as e:
            print(f" ERROR saving {insight_type} to MySQL: {e}")
            return 0

    def check_existing_data(self):
        """Check what's already in the database"""
        try:
            with self.engine.connect() as conn:
                result = conn.execute(text("SELECT COUNT(*) as count FROM AllInsights"))
                count = result.fetchone()[0]
                print(f"📊 Currently {count} records in AllInsights table")
                
                # Show insight types distribution
                result = conn.execute(text("SELECT Insight_Type, COUNT(*) FROM AllInsights GROUP BY Insight_Type"))
                types = result.fetchall()
                print("📊 Insight types in database:")
                for insight_type, count in types:
                    print(f"   - {insight_type}: {count} records")
                    
                # Show a few sample records
                result = conn.execute(text("SELECT ZM, BM, Insight_Type FROM AllInsights LIMIT 5"))
                samples = result.fetchall()
                print("📊 Sample records:")
                for zm, bm, insight_type in samples:
                    print(f"   - ZM: {zm}, BM: {bm}, Type: {insight_type}")
                    
        except Exception as e:
            print(f"❌ Error checking database: {e}")

# ================= SIMPLIFIED USAGE =================
def save_insights_to_database():
    """Save all insights to database - Simple usage"""
    
    # Initialize the database saver
    db_saver = InsightDatabaseSaver()
    
    # Check what's already in the database
    print("📊 CHECKING EXISTING DATA...")
    db_saver.check_existing_data()
    
    print("\n" + "="*60)
    print("SAVING INSIGHTS TO DATABASE")
    print("="*60)
    
    try:
        # Save all insights from your InsightOrchestrator output
        # Replace 'all_insights' with your actual variable from InsightOrchestrator.generate_all_insights()
        # Replace 'merged_with_hierarchyy' with your hierarchy dataframe
        
        total_saved = db_saver.save_all_insights(
            all_insights=all_insights,  # Your InsightOrchestrator output
            hierarchy_df=merged_with_hierarchyy  # Your hierarchy dataframe
        )
        
        print(f"\n FINAL RESULT: {total_saved} insights saved to database")
        
    except Exception as e:
        print(f" ERROR: {e}")
    
    # Check final state
    print("\n" + "="*60)
    print("FINAL DATABASE STATE")
    print("="*60)
    db_saver.check_existing_data()

# ================= ALTERNATIVE: INDIVIDUAL SAVING =================
def save_insights_individual():
    """Alternative: Save insights individually if you have separate variables"""
    
    db_saver = InsightDatabaseSaver()
    
    # If you have separate insight variables, you can use this approach:
    # This assumes you have variables like:
    # - zm_concentration_insights
    # - bm_concentration_insights  
    # - zm_leaders_insights
    # - bm_leaders_insights
    # - drastic_change_insights
    # - focus_area_insights
    
    print("Saving insights individually...")
    
    # Example structure for individual saving:
    combined_insights = {
        'ZM': {
            'kpi_insights': {
                'Equity Sales': {
                    'concentration': zm_concentration_insights,
                    'leaders_laggers': zm_leaders_insights
                }
            },
            'drastic_changes': drastic_change_insights,
            'focus_areas': focus_area_insights
        }
    }
    
    db_saver.save_all_insights(combined_insights, merged_with_hierarchyy)

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    
    # Use the main function
    save_insights_to_database()

2025-11-11 17:52:15,630 - __main__ - INFO - AllInsights table created successfully
📊 CHECKING EXISTING DATA...
📊 Currently 0 records in AllInsights table
📊 Insight types in database:
📊 Sample records:

SAVING INSIGHTS TO DATABASE
 ERROR: name 'all_insights' is not defined

FINAL DATABASE STATE
📊 Currently 0 records in AllInsights table
📊 Insight types in database:
📊 Sample records:


In [152]:
fy23_BN = pd.read_excel("FY23 - YTD - Partner Score Card - Business Numbers.xls",)
fy24_BN = pd.read_excel("FY24 - YTD - Partner Score Card - Business Numbers.xls")
fy25_BN = pd.read_excel("FY25 - YTD - Partner Score Card - Business Numbers.xls")

In [153]:
# Make first row the header
fy23_BN.columns = fy23_BN.iloc[0]  
fy23_BN = fy23_BN[1:]               
fy23_BN = fy23_BN.reset_index(drop=True) 

In [154]:
# Make first row the header
fy24_BN.columns = fy24_BN.iloc[0]   # assign first row as header
fy24_BN = fy24_BN[1:]               # drop the first row
fy24_BN = fy24_BN.reset_index(drop=True) 

In [174]:
# Make first row the header
fy25_BN.columns = fy25_BN.iloc[0]   # assign first row as header
fy25_BN = fy25_BN[1:]               # drop the first row
fy25_BN = fy25_BN.reset_index(drop=True) 

In [156]:
fy23_BN['FY_Year'] = '2023'
fy24_BN['FY_Year'] = '2024'
fy25_BN['FY_Year'] = '2025'

In [157]:
BN_total = pd.concat([fy23_BN,fy24_BN,fy25_BN], axis = 0)

In [178]:
BN_total.head(3)

Unnamed: 0,Sr No.,Partner Code,Partner Name,Center,Category,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023
1,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023
2,3,24695,CHINTU KUMAR SHAW,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1200000,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,27000,0.0,0.0,0.0,0.0,2023


In [221]:
fy23_SC = pd.read_excel("FY23 - YTD - Partner Score Card - Scores.xls")
fy24_SC = pd.read_excel("FY24 - YTD - Partner Score Card - Scores.xls")
fy25_SC = pd.read_excel("FY25 - YTD - Partner Score Card - Scores.xls")

In [160]:
# Make first row the header
fy23_SC.columns = fy23_SC.iloc[0]   # assign first row as header
fy23_SC = fy23_SC[1:]               # drop the first row
fy23_SC = fy23_SC.reset_index(drop=True) 

In [162]:
import pandas as pd

# If your columns are like a MultiIndex (from reading an Excel with merged headers)
# First, fill forward the top-level header
cols = fy23_SC.columns.to_list()
new_cols = []
last_val = None
for c in cols:
    if pd.notna(c):
        last_val = c
        new_cols.append(c)
    else:
        new_cols.append(last_val)
fy23_SC.columns = new_cols

# Now handle the 2nd row if it contains sub-columns like Target/Achievement/% Achievement
# Suppose the 0th row in your df contains sub-column info
sub_cols = fy23_SC.iloc[0].fillna('')
fy23_SC = fy23_SC[1:]  # Remove the 0th row used for sub-headers

# Combine top-level + sub-column names
fy23_SC.columns = [f"{top}_{sub}" if sub != '' else top for top, sub in zip(fy23_SC.columns, sub_cols)]

# Reset index
fy23_SC.reset_index(drop=True, inplace=True)

In [163]:
# Make first row the header
fy24_SC.columns = fy24_SC.iloc[0]   # assign first row as header
fy24_SC = fy24_SC[1:]               # drop the first row
fy24_SC = fy24_SC.reset_index(drop=True) 

In [164]:
import pandas as pd

# If your columns are like a MultiIndex (from reading an Excel with merged headers)
# First, fill forward the top-level header
cols = fy24_SC.columns.to_list()
new_cols = []
last_val = None
for c in cols:
    if pd.notna(c):
        last_val = c
        new_cols.append(c)
    else:
        new_cols.append(last_val)
fy24_SC.columns = new_cols

# Now handle the 2nd row if it contains sub-columns like Target/Achievement/% Achievement
# Suppose the 0th row in your df contains sub-column info
sub_cols = fy24_SC.iloc[0].fillna('')
fy24_SC = fy24_SC[1:]  # Remove the 0th row used for sub-headers

# Combine top-level + sub-column names
fy24_SC.columns = [f"{top}_{sub}" if sub != '' else top for top, sub in zip(fy24_SC.columns, sub_cols)]

# Reset index
fy24_SC.reset_index(drop=True, inplace=True)


In [165]:
fy24_SC.head()

Unnamed: 0,Sr No,Broker Code,Partner Name,Doer Name,Doer Type,Center,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement
0,1,33054,BIPLAB ROY,Santanu Deb,Fundz Express,AGARTALA,1080000.0,0.0,0.0,,,,27000.0,0.0,0.0,16.0,2.0,12.5,0.0,0.0,,0.0,0.0,2.5
1,2,35213,PRIYA DATTA,Santanu Deb,Fundz Express,AGARTALA,,,,10800.0,0.0,0.0,,,,,,,,,,,,0.0
2,3,88928,Santanu Deb,Santanu Deb,Fundz Express,AGARTALA,4752000.0,2593801.36,54.58,16200.0,0.0,0.0,97200.0,116494.38,119.85,48.0,31.0,64.58,10708.97,0.0,0.0,195991.35,72.59,70.7
3,4,33434,TAPAS RUDRA PAL,Santanu Deb,Fundz Express,AGARTALA,1080000.0,2520360.4,233.37,,,,27000.0,94683.46,350.68,16.0,58.0,362.5,451.8,0.0,0.0,59997.11,22.22,275.01
4,5,26185,AMIT GANGWAR,SAURABH AGRAWAL,UNIT MANAGER,AGRA,5443200.0,1074847.36,19.75,93600.0,0.0,0.0,133920.0,78146.07,58.35,48.0,30.0,62.5,886.06,0.0,0.0,67096.73,9.71,30.83


In [184]:
fy25_SC.columns = fy25_SC.iloc[0]   
fy25_SC = fy25_SC[1:]               
fy25_SC = fy25_SC.reset_index(drop=True) 

In [185]:
import pandas as pd
cols = fy25_SC.columns.to_list()
new_cols = []
last_val = None
for c in cols:
    if pd.notna(c):
        last_val = c
        new_cols.append(c)
    else:
        new_cols.append(last_val)
fy25_SC.columns = new_cols

sub_cols = fy25_SC.iloc[0].fillna('')
fy25_SC = fy25_SC[1:]  # Remove the 0th row used for sub-headers

fy25_SC.columns = [f"{top}_{sub}" if sub != '' else top for top, sub in zip(fy25_SC.columns, sub_cols)]

fy25_SC.reset_index(drop=True, inplace=True)


In [167]:
fy25_SC.head()

Unnamed: 0,Partner Scorecard Report For Month Mar 25_Sr No,Unnamed: 1_Broker Code,Unnamed: 2_Partner Name,Unnamed: 3_Doer Name,Unnamed: 4_Doer Type,Unnamed: 5_Center,Unnamed: 6_Equity Net Sales,Unnamed: 7,Unnamed: 8,Unnamed: 9_Insurance,Unnamed: 10,Unnamed: 11,Unnamed: 12_SIP Sales,Unnamed: 13,Unnamed: 14,Unnamed: 15_Client Acquisition,Unnamed: 16,Unnamed: 17,Unnamed: 18_LAS,Unnamed: 19,Unnamed: 20,Unnamed: 21_SIP to Net Sales Ratio,Unnamed: 22,Unnamed: 23_Total % Achievement
0,,,,,,,Target,Achievement,% Achievement,Target,Achievement,% Achievement,Target,Achievement,% Achievement,Target,Achievement,% Achievement,Target,Achievement,% Achievement,SIP Input Value,Ratio,
1,1.0,42342.0,ABHISHEK KUMAR,GUNJAN KUMAR ROY,UNIT MANAGER,AGRA,2721600,6748802.27,247.97,28000,92237.5,329.42,68040,453382.33,666.35,25.5,68,266.67,1331.31,464000,200,309484.81,68.23,395.32
2,2.0,42032.0,AJAJ AHMED,GUNJAN KUMAR ROY,UNIT MANAGER,AGRA,3175200,53890.98,1.7,16800,13777.96,82.01,79380,7124.71,8.98,29.75,18,60.5,177.56,0,0,11499.49,2.54,23.2
3,3.0,41167.0,AMAN GUPTA,GUNJAN KUMAR ROY,UNIT MANAGER,AGRA,3175200,964994.72,30.39,33600,0,0,79380,98245.11,123.77,29.75,2,6.72,219.92,0,0,65496.74,14.44,46.32
4,4.0,26185.0,AMIT GANGWAR,SAURABH AGRAWAL,UNIT MANAGER,AGRA,8709120,1649413.22,18.94,134400,0,0,190512,26098.76,13.7,51,14,27.45,3102.3,0,0,84495.9,11.64,12.02


In [170]:
fy23_SC.head(1)

Unnamed: 0,Sr No,Broker Code,Partner Name,Doer Name,Doer Type,Center,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement
0,1,23676,ALOKE CHATTERJEE,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0,4999.76,1.67,17.82


In [171]:
fy24_SC.head(1)

Unnamed: 0,Sr No,Broker Code,Partner Name,Doer Name,Doer Type,Center,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement
0,1,33054,BIPLAB ROY,Santanu Deb,Fundz Express,AGARTALA,1080000,0,0,,,,27000,0,0,16,2,12.5,0,0,,0,0,2.5


In [186]:
fy25_SC.head(1)

Unnamed: 0,Sr No,Broker Code,Partner Name,Doer Name,Doer Type,Center,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement
0,1,42342,ABHISHEK KUMAR,GUNJAN KUMAR ROY,UNIT MANAGER,AGRA,2721600,6748802.27,247.97,28000,92237.5,329.42,68040,453382.33,666.35,25.5,68,266.67,1331.31,464000,200,309484.81,68.23,395.32


In [191]:
fy23_SC['FY_Year'] = '2023'
fy24_SC['FY_Year'] = '2024'
fy25_SC['FY_Year'] = '2025'

In [192]:
SC_totals = pd.concat([fy23_SC,fy24_SC,fy25_SC], axis = 0)

In [193]:
SC_totals.head()

Unnamed: 0,Sr No,Broker Code,Partner Name,Doer Name,Doer Type,Center,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement,FY_Year
0,1,23676,ALOKE CHATTERJEE,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82,2023
1,2,20361,ARINDAM CHAKRAVARTI,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04,2023
2,3,24695,CHINTU KUMAR SHAW,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1200000,0.0,0.0,,,,27000,0.0,0.0,16,3.0,18.75,0.0,0,,0.0,0.0,2.81,2023
3,4,24040,GOURAB PURKAIT,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,26998.7,1.8,,,,33750,10499.49,31.11,20,3.0,15.0,3.11,0,0.0,6999.66,2.33,14.59,2023
4,5,25776,RAJ KISHORE BARIK,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,600000,496499.92,82.75,,,,13500,1499.92,11.11,8,3.0,37.5,0.0,0,,0.0,0.0,40.82,2023


In [24]:
Partner_list = pd.read_excel("2025-03 - D Non-D Partner List.xls")

In [25]:
Partner_list.columns = Partner_list.iloc[0]   
Partner_list = Partner_list[1:]               
Partner_list = Partner_list.reset_index(drop=True) 

In [194]:
Partner_list.head()

Unnamed: 0,Sr.No.,Partner Code,Partner Name,Center,Enrollment Date,Partner Category,Age in NJ,No of Live Account,Partner Consent,Relationship Handler Name,Role,MARS Category,PMS Category,NFO Category,Latest SDC Status,SDC Year,Latest Sub-Category,Sub-Category From-To Date,Partner Product-Status
0,10601,1166,Mehul Rameshchandra Ankleshwaria,Bharuch,15/10/2009,D,184,2,,AMIT PRANCHVANI,BRANCH MANAGER,,,,Not Received,2023 - 2024,Normal,01/01/2025 - 31/03/2025,MUTUAL FUND-Terminated
1,8721,1303,Kapil Amrutbhai Patel,BARDOLI,15/10/2009,D,184,7,,NIKHIL SURESHCHANDRA GANDHI,Fundz Express,,,,Not Received,2023 - 2024,Normal,01/01/2025 - 31/03/2025,MUTUAL FUND-Terminated
2,2235,1316,Heena Mohan Khatri,Ahmedabad - Navrangpura,15/10/2009,D,184,0,,AKASH M. THAKKAR,UNIT MANAGER,,,,Not Received,2023 - 2024,Normal,01/01/2025 - 31/03/2025,MUTUAL FUND-Terminated
3,40140,1354,Bhavya T Doshi,Mumbai - Borivali,15/10/2009,D,184,43,,PURVI HEERAN MEHTA,BRANCH MANAGER,,,,Received,2023 - 2024,Normal,01/01/2025 - 31/03/2025,MUTUAL FUND-Terminated
4,8767,1446,Saurabh Dipak Thakore,BARDOLI,15/10/2009,D,184,22,,NIKHIL SURESHCHANDRA GANDHI,Fundz Express,,,,Not Received,2023 - 2024,Normal,01/01/2025 - 31/03/2025,MUTUAL FUND-Terminated


In [27]:
fy24_MIS = pd.read_excel("FY24 - Partner MIS Data.xlsx")

In [28]:
fy24_MIS.columns = fy24_MIS.iloc[0]
fy24_MIS = fy24_MIS.drop(fy24_MIS.index[0])
fy24_MIS = fy24_MIS.reset_index(drop=True)
#print(fy24_MIS.head())

In [29]:
fy25_MIS = pd.read_excel("FY25 - Partner MIS Data.xlsx")

In [30]:
fy25_MIS.columns = fy25_MIS.iloc[0]
fy25_MIS = fy25_MIS.drop(fy25_MIS.index[0])
fy25_MIS = fy25_MIS.reset_index(drop=True)
#print(fy25_mis.head())

In [32]:
fy24_MIS.head()

Unnamed: 0,ZM,SRM,RM,BM,Broker Code,Partner Name,Category,Doer Name,Doer Type,Center,Age in NJ,Age in NJ Target,Total AUM FY 23-24 Q4 YTD,Equity+ Hyb AUM FY 23-24 Q4 YTD,LIVE SIP FY 23-24 Q4 YTD,Total Net Sales\nFY 23-24 YTD,Equity Net Sales\nFY 23-24 YTD,Net SIP\nFY 23-24 YTD,MARS AUM FY 23-24 YTD,MARS Net Sales FY 23-24 YTD,PMS AUM FY 23-24 YTD,Net Sales FY 23-24 YTD,Clients Acquired FY 23-24 YTD,Live Accounts FY 23-24 YTD,Saturday School (YTD) FY 23-24 Q4 YTD,Investment\nSaturday School (YTD) FY 23-24 Q4 YTD,Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD,Total Group FY 23-24 Q4 YTD,Group Covered FY 23-24 Q4 YTD,% Covered,Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr),Total Reviews\nFY 23-24 Q4 YTD,Amount FY 23-24 Q4 YTD\n(in Cr),Flexicap Target\nFY 23-24 Q4 YTD\n(in Cr),Flexicap Ach\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Target\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Ach\nFY 23-24 Q4 YTD\n(in Cr),Target Qty (FY 23-24 Q4 YTD)\n(in Cr),Order Qty\n(FY 23-24 Q4 YTD)\n(in Cr),Equity Net Sales% Achievement,SIP Sales% Achievement,Client Acquisition% Achievement,Insurance% Achievement,LAS% Achievement,Total % Achievement,Status
0,KOUSHIK GHOSH,,KAMLESH KUMAR,,33054,BIPLAB ROY,NON D,Santanu Deb,Fundz Express,AGARTALA,3,3,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,0,,,,,,,,,0,,,,,,,0.0,,0.0,12.5,,2.5,MUTUAL FUND-Active
1,KOUSHIK GHOSH,,KAMLESH KUMAR,,88928,Santanu Deb,NON D,Santanu Deb,Fundz Express,AGARTALA,55,8,7090659.76,7090659.76,228989.7,1935230.657,1935230.666,95995.23,1999435,1050000,0,0,16,35,,,,,,,,,0,,,,,,,54.58,0.0,119.85,64.58,0.0,70.7,MUTUAL FUND-Active
2,KOUSHIK GHOSH,,KAMLESH KUMAR,,33434,TAPAS RUDRA PAL,NON D,Santanu Deb,Fundz Express,AGARTALA,3,3,2114389.27,2076409.88,108495.24,2082466.446,2045363.633,108495.24,0,0,0,0,30,27,,,,,,,,,0,,,,,,,233.37,,350.68,362.5,0.0,275.01,MUTUAL FUND-Active
3,PRASHANT ANANTRAI KAKKAD,BHAVESH B. JOSHI,ABHAY KUMAR VISHWAKARMA,AALEY NABI,26185,AMIT GANGWAR,NON D,SAURABH AGRAWAL,UNIT MANAGER,AGRA,13,13,885945.96,885945.96,67096.73,567009.9,567009.902,60096.98,0,0,0,0,18,23,,,,,,,,,0,,,,,,,19.75,0.0,58.35,62.5,0.0,30.83,MUTUAL FUND-Active
4,PRASHANT ANANTRAI KAKKAD,BHAVESH B. JOSHI,ABHAY KUMAR VISHWAKARMA,AALEY NABI,21754,ANJANA VARSHNEY,NON D,SAURABH AGRAWAL,UNIT MANAGER,AGRA,19,13,325872.43,325872.43,9999.5,189993.304,189993.304,-8999.55,0,0,0,0,0,2,,,,,,,,,0,,,,,,,6.98,,-9.8,4.17,0.0,-1.22,MUTUAL FUND-Active


In [33]:
fy24_MIS['FY_Year'] = '2024'
fy25_MIS['FY_Year'] = '2025'

In [34]:
fy24_MIS['FY_Year'] = '2024'
fy25_MIS['FY_Year'] = '2025'

MIS_total = pd.concat([fy24_MIS, fy25_MIS], axis=0)

In [35]:
MIS_total.tail(5)

Unnamed: 0,ZM,SRM,RM,BM,Broker Code,Partner Name,Category,Doer Name,Doer Type,Center,Age in NJ,Age in NJ Target,Total AUM FY 23-24 Q4 YTD,Equity+ Hyb AUM FY 23-24 Q4 YTD,LIVE SIP FY 23-24 Q4 YTD,Total Net Sales\nFY 23-24 YTD,Equity Net Sales\nFY 23-24 YTD,Net SIP\nFY 23-24 YTD,MARS AUM FY 23-24 YTD,MARS Net Sales FY 23-24 YTD,PMS AUM FY 23-24 YTD,Net Sales FY 23-24 YTD,Clients Acquired FY 23-24 YTD,Live Accounts FY 23-24 YTD,Saturday School (YTD) FY 23-24 Q4 YTD,Investment\nSaturday School (YTD) FY 23-24 Q4 YTD,Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD,Total Group FY 23-24 Q4 YTD,Group Covered FY 23-24 Q4 YTD,% Covered,Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr),Total Reviews\nFY 23-24 Q4 YTD,Amount FY 23-24 Q4 YTD\n(in Cr),Flexicap Target\nFY 23-24 Q4 YTD\n(in Cr),Flexicap Ach\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Target\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Ach\nFY 23-24 Q4 YTD\n(in Cr),Target Qty (FY 23-24 Q4 YTD)\n(in Cr),Order Qty\n(FY 23-24 Q4 YTD)\n(in Cr),Equity Net Sales% Achievement,SIP Sales% Achievement,Client Acquisition% Achievement,Insurance% Achievement,LAS% Achievement,Total % Achievement,Status,FY_Year,Total AUM FY 24-25 Q4 YTD,Equity AUM FY 24-25 Q4 YTD,LIVE SIP FY 24-25 Q4 YTD,Total Net Sales\nFY 24-25 Q4 YTD,Equity Net Sales\nFY 24-25 Q4 YTD,Net SIP\nFY 24-25 Q4 YTD,MARS AUM FY 24-25 Q4 YTD,MARS Net Sales FY 24-25 Q4 YTD,PMS AUM FY 24-25 Q4 YTD,Net Sales FY 24-25 Q4 YTD,Clients Acquired FY 24-25 Q4 YTD,Live Accounts (Non-D) FY 24-25 Q4 YTD,Saturday School (YTD) FY 24-25 Q4 YTD,Investment\nSaturday School (YTD) FY 24-25 Q4 YTD,Insurance\nSaturday School (YTD) FY 24-25 Q4 YTD,Total Group FY 24-25 Q4 YTD,Group Covered FY 24-25 Q4 YTD,Total Reviews\nFY 24-25 Q4 YTD,Non-NJ AUM\nFY 24-25 Q4 YTD,Amount FY 24-25 Q4 YTD,Flexicap Target\nFY 24-25 Q4 YTD,Flexicap Ach\nFY 24-25 Q4 YTD,AMC NS Target\nFY 24-25 Q4 YTD,AMC NS Ach\nFY 24-25 Q4 YTD,Target Qty (FY 24-25 Q4 YTD)\n(in Cr),Order Qty\n(FY 24-25 Q4 YTD)\n(in Cr),MARS TO EQ AUM
71299,PRASHANT ANANTRAI KAKKAD,AVINASH SINGH,AKHILESH SINGH,AYUSH JAIN,99980,YOGESH SHARMA,D,AVINASH SINGH,SR. REGIONAL MANAGER,NEW DELHI - NSP,48,48,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,0.03,,0.0,0.0,0,0.01,MUTUAL FUND-Terminated,2025,391152.34,391152.34,0.0,-19947.622,-19947.622,0.0,0,0.0,0,0,0,3,0,0,0,1,0,0,0,0,0,0.0,0,0.0,0.0,0,0.0
71300,SARFARAZ ABDULLA PATEL,SWAPNIL ADMANE,SHRIKANT M. GABALE,PRADEEP S. PATIL,99981,VAISHALI DATTATRAYA JADHAV,NON D,ANIKET GAUTAM YATAM,UNIT MANAGER,SANGLI,48,48,,,,,,,,,,,,,,,,,,0.5,,,,,,,,,,59.15,18.34,72.41,105.88,0,54.02,MUTUAL FUND-Active,2025,19220045.69,19186516.82,421379.93,7067803.607,7058804.056,146691.98,1883442,912824.91,0,0,54,119,6,6,0,2,1,3,64380,0,14756,7198.58,676736,274515.474,2417.113161,2000,0.098165
71301,HIMANSHU BHARAT MEHTA,,SAMEER DHOTARKAR,PRASHANT M. DESHPANDE,99982,TRIBHOVAN RAJENDRA GUPTA,D,PRASHANT M. DESHPANDE,BRANCH MANAGER,Mumbai - Vashi,48,48,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,0.0,,0.0,0.0,0,0.0,MUTUAL FUND-Terminated,2025,468734.92,468734.92,0.0,0.0,0.0,0.0,468735,0.0,0,0,0,1,0,0,0,1,0,0,0,0,0,0.0,0,0.0,0.0,0,1.0
71302,PRASHANT ANANTRAI KAKKAD,Manish Ashok Ambani,KETAN PANDIT,JIGNESH B SHAH,99983,SHAH TEJAS DHIRAJBHAI,D,JIGNESH B SHAH,BRANCH MANAGER,Ahmedabad - Navrangpura,48,48,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,5.53,,22.14,17.65,0,13.86,MUTUAL FUND-Active,2025,2876431.15,2876431.15,175791.42,1707516.38,1707516.38,87295.75,0,0.0,0,0,9,23,0,0,0,1,0,0,0,0,0,0.0,0,0.0,0.0,0,0.0
71303,VINAY DUDABHAI BARAIYA,,MEHUL TRIVEDI,DHARMESHBHAI BHATT,99993,Prajapati Vaibhavkumar Sureshbhai,D,JUVARDA DHARMIK NITINBHAI,UNIT MANAGER,Baroda - Alkapuri,44,44,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,1.14,0.0,-2.4,27.45,0,3.63,MUTUAL FUND-Active,2025,5559685.79,5559685.79,50997.52,984315.624,984315.624,18999.09,0,0.0,0,0,9,37,0,0,0,5,0,0,0,0,0,0.0,0,0.0,0.0,0,0.0


In [36]:
fy24_MIS.head(3)

Unnamed: 0,ZM,SRM,RM,BM,Broker Code,Partner Name,Category,Doer Name,Doer Type,Center,Age in NJ,Age in NJ Target,Total AUM FY 23-24 Q4 YTD,Equity+ Hyb AUM FY 23-24 Q4 YTD,LIVE SIP FY 23-24 Q4 YTD,Total Net Sales\nFY 23-24 YTD,Equity Net Sales\nFY 23-24 YTD,Net SIP\nFY 23-24 YTD,MARS AUM FY 23-24 YTD,MARS Net Sales FY 23-24 YTD,PMS AUM FY 23-24 YTD,Net Sales FY 23-24 YTD,Clients Acquired FY 23-24 YTD,Live Accounts FY 23-24 YTD,Saturday School (YTD) FY 23-24 Q4 YTD,Investment\nSaturday School (YTD) FY 23-24 Q4 YTD,Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD,Total Group FY 23-24 Q4 YTD,Group Covered FY 23-24 Q4 YTD,% Covered,Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr),Total Reviews\nFY 23-24 Q4 YTD,Amount FY 23-24 Q4 YTD\n(in Cr),Flexicap Target\nFY 23-24 Q4 YTD\n(in Cr),Flexicap Ach\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Target\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Ach\nFY 23-24 Q4 YTD\n(in Cr),Target Qty (FY 23-24 Q4 YTD)\n(in Cr),Order Qty\n(FY 23-24 Q4 YTD)\n(in Cr),Equity Net Sales% Achievement,SIP Sales% Achievement,Client Acquisition% Achievement,Insurance% Achievement,LAS% Achievement,Total % Achievement,Status,FY_Year
0,KOUSHIK GHOSH,,KAMLESH KUMAR,,33054,BIPLAB ROY,NON D,Santanu Deb,Fundz Express,AGARTALA,3,3,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,0,,,,,,,,,0,,,,,,,0.0,,0.0,12.5,,2.5,MUTUAL FUND-Active,2024
1,KOUSHIK GHOSH,,KAMLESH KUMAR,,88928,Santanu Deb,NON D,Santanu Deb,Fundz Express,AGARTALA,55,8,7090659.76,7090659.76,228989.7,1935230.657,1935230.666,95995.23,1999435,1050000,0,0,16,35,,,,,,,,,0,,,,,,,54.58,0.0,119.85,64.58,0.0,70.7,MUTUAL FUND-Active,2024
2,KOUSHIK GHOSH,,KAMLESH KUMAR,,33434,TAPAS RUDRA PAL,NON D,Santanu Deb,Fundz Express,AGARTALA,3,3,2114389.27,2076409.88,108495.24,2082466.446,2045363.633,108495.24,0,0,0,0,30,27,,,,,,,,,0,,,,,,,233.37,,350.68,362.5,0.0,275.01,MUTUAL FUND-Active,2024


In [38]:
roww = SC_totals[SC_totals['Partner Name'] == 'SUBRATA MAITY']

In [39]:
roww.head()

Unnamed: 0,Sr No,Broker Code,Partner Name,Doer Name,Doer Type,Center,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement,FY_Year
7,8,13226,SUBRATA MAITY,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,5280000,1513270.5,28.66,,,,110700,22723.69,20.53,48,27,56.25,1450.56,0,0,31999.13,6.67,26.88,2023
6790,6791,13226,SUBRATA MAITY,POOJA K. PAREKH,UNIT MANAGER,E - SURAT,4212000,25180739.61,597.83,,,,97200,102370.0,105.32,48,36,75.0,14011.32,0,0,90996.21,33.7,216.57,2024
8506,8507,13226,SUBRATA MAITY,SHREYAS D. BARBHAYA,UNIT MANAGER,E - SURAT,5103000,82841140.61,1623.38,50400.0,0.0,0.0,119070,287548.28,241.5,51,94,184.31,91382.76,0,0,271987.37,49.97,483.79,2025


In [40]:
merged_df = pd.merge(
    BN_total,
    SC_totals,
    how='left',
    left_on=['Partner Code', 'Partner Name', 'FY_Year'],
    right_on=['Broker Code', 'Partner Name', 'FY_Year']
)


In [41]:
merged_df.head()

Unnamed: 0,Sr No.,Partner Code,Partner Name,Center_x,Category,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year,Sr No,Broker Code,Doer Name,Doer Type,Center_y,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,1,23676,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82
1,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,2,20361,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04
2,3,24695,CHINTU KUMAR SHAW,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1200000,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,27000,0.0,0.0,0.0,0.0,2023,3,24695,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1200000,0.0,0.0,,,,27000,0.0,0.0,16,3.0,18.75,0.0,0,,0.0,0.0,2.81
3,4,24040,GOURAB PURKAIT,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,26998.7,1.8,26998.7,0,0,0,0,0,0,0,0,0,0,33750,10499.49,31.11,10499.49,0.0,2023,4,24040,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,26998.7,1.8,,,,33750,10499.49,31.11,20,3.0,15.0,3.11,0,0.0,6999.66,2.33,14.59
4,5,25776,RAJ KISHORE BARIK,24 SOUTH PARGANA,NON D,SUBRATA MAITY,600000,496499.92,82.75,1499.92,0,0,0,0,0,0,0,0,0,495000,13500,1499.92,11.11,1499.92,0.0,2023,5,25776,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,600000,496499.92,82.75,,,,13500,1499.92,11.11,8,3.0,37.5,0.0,0,,0.0,0.0,40.82


In [42]:
merged_df = merged_df[merged_df['FY_Year'].isin(['2023','2024','2025'])]

In [46]:
merged_df['Partner Code'] = merged_df['Partner Code'].astype(str).str.strip()
merged_df['FY_Year'] = merged_df['FY_Year'].astype(str).str.strip()

In [138]:
merged_df['Broker Code'] = merged_df['FY_Year'].astype(str).str.strip()

In [196]:
row_data = merged_df[merged_df['Partner Name'] == 'VA Financial Products Distribution LLP']

In [197]:
row_data

Unnamed: 0,Sr No.,Partner Code,Partner Name,Center_x,Category,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year,Sr No,Broker Code,Doer Name,Doer Type,Center_y,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement,Total_Performance
46560,23439,1461,VA Financial Products Distribution LLP,Surat (ALT CHANNEL),NON D,VA Financial Products Distribution LLP,13608000,2731750202.19,20074.59,1639518330.61,-3149360.55,-1483279.61,0,0,1244500,0,0,331245797.75,0,753660657.92,226800,26413006.39,11645.95,43983185.69,17570179.3,2024,23439,1461,VA Financial Products Distribution LLP,Agency Manager,Surat (ALT CHANNEL),13608000,2731750202.19,20074.59,24208768.66,37281966.15,154.0,226800,26413006.39,11645.95,48,925,1927.08,27162774.44,1442000,5.31,87078635.02,23036.68,7836.8,150738862.17
79784,30952,1461,VA Financial Products Distribution LLP,SURAT - VESU,NON D,VA Financial Products Distribution LLP,6032880,3269519068.1,54195.0,1778494517.64,2476858.45,76555121.12,0,0,742000,0,0,127494722.75,122970636,1160785212.14,144020,15419066.05,10706.2,39401031.5,23981965.45,2025,30952,1461,VA Financial Products Distribution LLP,Agency Manager,SURAT - VESU,6032880,3269519068.1,54195.0,28162590.59,47158512.7,167.45,144020,15419066.05,10706.2,51,1114,2184.31,41418053.19,13021389,31.44,110284108.44,15195.67,14423.51,232170465.818


In [198]:
merged_df.duplicated().sum()

0

In [52]:
merged_df = merged_df.drop(columns=['Center_y', 'FY_Year_y', 'Sr No'])
merged_df = merged_df.rename(columns={'Center_x':'Center', 'FY_Year_x':'FY_Year', 'Sr No.': 'Sr No.'})


KeyError: "['FY_Year_y'] not found in axis"

In [59]:
merged_df = merged_df.drop(columns=['Sr No'])
merged_df = merged_df.rename(columns={'Sr No.': 'Sr No.'})


In [85]:
merged_df.columns

Index(['Sr No.', 'Partner Code', 'Partner Name', 'Center_x', 'Category',
       'Relationship Handler', 'Investment Net Sales Target',
       'Investment Net Sales Achievement',
       'Investment Net Sales % Achievement', 'Equity Sales', 'MIP Sales',
       'Gold Sales', 'Sales in Physical Assets ', 'Sales in Direct Equity ',
       'FD + Bond (Primary Market) Sales', 'Secondary Market Bond Sales',
       'Net Sales Through Realty', 'Net NJ PMS Sales', 'Net Non-NJ PMS Sales',
       'Net Sales through MARS', 'SIP Sales Target', 'SIP Sales Achievement',
       'SIP Sales % Achievement', 'Fresh Gross SIP Sales',
       'SIP Closure / Termination', 'FY_Year', 'Sr No', 'Broker Code',
       'Doer Name', 'Doer Type', 'Center_y', 'Equity Net Sales_Target',
       'Equity Net Sales_Achievement', 'Equity Net Sales_% Achievement',
       'Insurance_Target', 'Insurance_Achievement', 'Insurance_% Achievement',
       'SIP Sales_Target', 'SIP Sales_Achievement', 'SIP Sales_% Achievement',
       

### Consolidated Score Level (for Employees /Partners)

In [61]:
import pandas as pd
import concurrent.futures
import google.generativeai as genai

gemini_api_key = google_api_key
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

kpi_percent_cols = [
    'Equity Net Sales_% Achievement',
    'SIP Sales_% Achievement',
    'Insurance_% Achievement',
    'Client Acquisition_% Achievement',
    'LAS_% Achievement'
]

kpi_numeric_cols = [
    'Net Sales through MARS',
    'Net NJ PMS Sales',
    'SIP to Net Sales Ratio_Ratio'
]

all_kpis = kpi_percent_cols + kpi_numeric_cols

def normalize_kpis(df, kpis):
    df_norm = df.copy()
    for col in kpis:
        max_val = df_norm[col].max()
        if max_val != 0:
            df_norm[col + '_Normalized'] = df_norm[col] / max_val * 100
        else:
            df_norm[col + '_Normalized'] = 0
    return df_norm

def compute_consolidated_score(df, kpis):
    norm_cols = [col + '_Normalized' for col in kpis]
    df_scores = df.groupby('Partner Name')[norm_cols].mean().reset_index()
    df_scores['Consolidated Score'] = df_scores[norm_cols].mean(axis=1)
    return df_scores

def get_top_partners(df_scores, top_n=5):
    return df_scores.sort_values(by='Consolidated Score', ascending=False).head(top_n)

def generate_consolidated_insights(df_top):
    insights_text = ""

    def generate_for_partner(row):
        partner = row['Partner Name']
        cons_score = round(row['Consolidated Score'], 2)
        kpi_scores = {kpi: round(row[kpi + '_Normalized'], 2) for kpi in all_kpis}

        prompt = f"""
You are an expert business analyst. Analyze the performance of partner '{partner}'.
Consolidated Score: {cons_score}%
Individual KPI Scores: {kpi_scores}

Generate descriptive insights:
- How the partner is performing overall
- Strengths and weaknesses based on KPI scores

Generate prescriptive insights:
- Recommended actions to improve performance
- Areas to focus on for growth
"""
        try:
            resp = model.generate_content(
                prompt,
                generation_config={
                    "temperature": 0.7,
                    "top_p": 0.9,
                    "top_k": 40,
                    "max_output_tokens": 1024
                }
            )
            return f"\nPartner: {partner}\n{resp.text}\n{'-'*80}\n"
        except Exception as e:
            return f"Error generating insights for {partner}: {e}\n"

    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(executor.map(generate_for_partner, [row for _, row in df_top.iterrows()]))

    insights_text = "\n".join(results)
    return insights_text

def main(merged_df):
 
    df_norm = normalize_kpis(merged_df, all_kpis)
    df_scores = compute_consolidated_score(df_norm, all_kpis)
    df_top = get_top_partners(df_scores, top_n=5)
    partner_insights = generate_consolidated_insights(df_top)
    return partner_insights

partner_insights = main(merged_df)  
print(partner_insights)



Partner: VA Financial Products Distribution LLP
Okay, let's analyze the performance of VA Financial Products Distribution LLP.

**Descriptive Insights:**

*   **Overall Performance:** With a consolidated score of 54.7%, VA Financial Products Distribution LLP is underperforming. While some areas show promise, the overall score suggests significant room for improvement. They are currently not meeting their overall targets.

*   **Strengths:**
    *   **SIP Sales:** The partner is performing well in SIP sales, achieving 80.07% of their target. This indicates a good understanding of SIP products and effective sales strategies in this area.
    *   **Net Sales through MARS:** A strong performance here at 82.46% suggests efficient utilization of the MARS platform for generating sales.
    *   **SIP to Net Sales Ratio:** A ratio of 82.98% indicates that a significant portion of their net sales is coming from SIPs. This could be seen as a strength in terms of recurring revenue, but also a pot

## Parameter-Wise Scores

In [86]:
import pandas as pd
import concurrent.futures
import google.generativeai as genai

parameter_kpis = [
    'Equity Net Sales_% Achievement',
    'SIP Sales_% Achievement',
    'Client Acquisition_% Achievement',
    'Net Sales through MARS',
    'LAS_% Achievement'
]

gemini_api_key = google_api_key  
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

def generate_parameter_insights(df, kpi_list, top_n=5):
    insights_text = ""
    
    df['Total_Performance'] = df[kpi_list].mean(axis=1)
    top_partners = df.sort_values(by='Total_Performance', ascending=False).head(top_n)
    
    for _, row in top_partners.iterrows():
        partner = row['Partner Name']
        insights_text += f"\nPartner: {partner}\n"
        
        for kpi in kpi_list:
            value = round(row[kpi], 2)
            prompt = f"""
You are an expert business analyst. Analyze the KPI '{kpi}' for partner '{partner}'.
KPI Achievement: {value}%

Generate descriptive insights:
- How the partner is performing on this KPI
- Strengths or weaknesses

Generate prescriptive insights:
- Recommended actions to improve this KPI
- Priority areas for focus
"""
            try:
                resp = model.generate_content(
                    prompt,
                    generation_config={
                        "temperature": 0.7,
                        "top_p": 0.9,
                        "top_k": 40,
                        "max_output_tokens": 1024
                    }
                )
                insights_text += f"{kpi} Insights:\n{resp.text}\n" + "-"*60 + "\n"
            except Exception as e:
                insights_text += f"Error generating insights for {partner} - {kpi}: {e}\n"
    
    return insights_text

parameter_insights = generate_parameter_insights(merged_df, parameter_kpis, top_n=5)
print(parameter_insights)



Partner: VA Financial Products Distribution LLP
Equity Net Sales_% Achievement Insights:
Okay, let's analyze the 'Equity Net Sales_% Achievement' KPI for VA Financial Products Distribution LLP, given their achievement of 54195%.

**Descriptive Insights:**

*   **Performance:** VA Financial Products Distribution LLP is performing exceptionally well on this KPI. An achievement of 54195% indicates they have significantly exceeded their target for Equity Net Sales. This is a massive overachievement.
*   **Strengths:**
    *   **Strong Sales Engine:** They clearly have a very effective sales strategy and execution. Their team is likely highly motivated and skilled in selling Equity products.
    *   **Market Penetration:** They have likely penetrated the market very effectively, reaching a large customer base.
    *   **Product Suitability:** The equity products they are offering are likely well-suited to the current market demand and customer preferences.
    *   **Excellent Relationship 

In [54]:
merged_df.columns

Index(['Sr No.', 'Partner Code', 'Partner Name', 'Center_x', 'Category',
       'Relationship Handler', 'Investment Net Sales Target',
       'Investment Net Sales Achievement',
       'Investment Net Sales % Achievement', 'Equity Sales', 'MIP Sales',
       'Gold Sales', 'Sales in Physical Assets ', 'Sales in Direct Equity ',
       'FD + Bond (Primary Market) Sales', 'Secondary Market Bond Sales',
       'Net Sales Through Realty', 'Net NJ PMS Sales', 'Net Non-NJ PMS Sales',
       'Net Sales through MARS', 'SIP Sales Target', 'SIP Sales Achievement',
       'SIP Sales % Achievement', 'Fresh Gross SIP Sales',
       'SIP Closure / Termination', 'FY_Year', 'Sr No', 'Broker Code',
       'Doer Name', 'Doer Type', 'Center_y', 'Equity Net Sales_Target',
       'Equity Net Sales_Achievement', 'Equity Net Sales_% Achievement',
       'Insurance_Target', 'Insurance_Achievement', 'Insurance_% Achievement',
       'SIP Sales_Target', 'SIP Sales_Achievement', 'SIP Sales_% Achievement',
       

In [64]:
import pandas as pd
import concurrent.futures
import google.generativeai as genai

# Mapping of KPIs to their Target, Achievement, % Achievement columns
parameter_kpis_info = {
    'Equity Net Sales': {
        'Target': 'Equity Net Sales_Target',
        'Achievement': 'Equity Net Sales_Achievement',
        '% Achievement': 'Equity Net Sales_% Achievement'
    },
    'SIP Sales': {
        'Target': 'SIP Sales_Target',
        'Achievement': 'SIP Sales_Achievement',
        '% Achievement': 'SIP Sales_% Achievement'
    },
    'Client Acquisition': {
        'Target': 'Client Acquisition_Target',
        'Achievement': 'Client Acquisition_Achievement',
        '% Achievement': 'Client Acquisition_% Achievement'
    },
    'LAS': {
        'Target': 'LAS_Target',
        'Achievement': 'LAS_Achievement',
        '% Achievement': 'LAS_% Achievement'
    }
}

# Configure Gemini API
gemini_api_key = google_api_key
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

def generate_parameter_insights(df, kpi_info, top_n=5, fill_missing_with_zero=True):
    insights_text = ""
    
    # Convert % Achievement columns to numeric
    for kpi, cols in kpi_info.items():
        df[cols['% Achievement']] = pd.to_numeric(df[cols['% Achievement']], errors='coerce')
    
    # Calculate Total_Performance for top N ranking
    all_percent_cols = [cols['% Achievement'] for cols in kpi_info.values()]
    if fill_missing_with_zero:
        df['Total_Performance'] = df[all_percent_cols].fillna(0).mean(axis=1)
    else:
        df['Total_Performance'] = df[all_percent_cols].mean(axis=1)
    
    # Select top N partners
    top_partners = df.sort_values(by='Total_Performance', ascending=False).head(top_n)
    
    # Generate Parameter-Wise insights
    for _, row in top_partners.iterrows():
        partner = row['Partner Name']
        insights_text += f"\nPartner: {partner}\n" + "-"*80 + "\n"
        
        for kpi, cols in kpi_info.items():
            target = row.get(cols['Target'], 'Data missing')
            achievement = row.get(cols['Achievement'], 'Data missing')
            pct_ach = row.get(cols['% Achievement'], 'Data missing')
            
            value_text = f"Target: {target}, Achievement: {achievement}, % Achievement: {pct_ach}"
            
            prompt = f"""
You are an expert business analyst. Analyze the KPI '{kpi}' for partner '{partner}'.
KPI Details: {value_text}

Generate descriptive insights:
- How the partner is performing on this KPI
- Strengths or weaknesses

Generate prescriptive insights:
- Recommended actions to improve this KPI
- Priority areas for focus
"""
            try:
                resp = model.generate_content(
                    prompt,
                    generation_config={
                        "temperature": 0.7,
                        "top_p": 0.9,
                        "top_k": 40,
                        "max_output_tokens": 1024
                    }
                )
                insights_text += f"{kpi} Insights:\n{resp.text}\n" + "-"*80 + "\n"
            except Exception as e:
                insights_text += f"Error generating insights for {partner} - {kpi}: {e}\n"
    
    return insights_text

# Run the function on your merged DataFrame
parameter_insights = generate_parameter_insights(merged_df, parameter_kpis_info, top_n=5)
print(parameter_insights)



Partner: VA Financial Products Distribution LLP
--------------------------------------------------------------------------------
Equity Net Sales Insights:
Okay, let's analyze the KPI "Equity Net Sales" for partner "VA Financial Products Distribution LLP" based on the provided data.

**Descriptive Insights:**

*   **Performance:** VA Financial Products Distribution LLP is performing exceptionally well against the Equity Net Sales target. They have achieved a sales volume of 3,269,519,068.1, significantly exceeding the target of 6,032,880.
*   **% Achievement:** The achievement percentage of 54,195.0% is extraordinarily high. This indicates a massive over-performance relative to the initial target.
*   **Strengths:**
    *   **Exceptional Sales Performance:**  VA Financial Products Distribution LLP demonstrates a strong ability to generate equity net sales. They are clearly a highly effective distribution partner.
    *   **Strong Market Penetration:** The high sales volume suggests th

In [52]:
merged_df[['Partner Name', 'Equity Net Sales_% Achievement']].sort_values(
    by='Equity Net Sales_% Achievement', ascending=False
).head(10)


Unnamed: 0,Partner Name,Equity Net Sales_% Achievement
79784,VA Financial Products Distribution LLP,54195.0
53017,ANJALI P MANOHAR,25435.78
46560,VA Financial Products Distribution LLP,20074.59
80029,PRATIBHA SINGHVI,15747.64
36207,PAMPA DEY,12381.2
28330,KHUSHI KHUSHAL GOLANI,9809.49
13570,MARZEE MAIDHYOMAH KERAWALA,8799.77
53382,SHAH PRATESH JAYANTILAL HUF,7737.01
56206,HEENA VISHAL DESAI,7411.53
59420,FUNDZCLUB INDIA,6660.39


### Combine

In [117]:
import pandas as pd
import concurrent.futures
import google.generativeai as genai

gemini_api_key = google_api_key  
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

kpi_percent_cols = [
    'Equity Net Sales_% Achievement',
    'SIP Sales_% Achievement',
    'Insurance_% Achievement',
    'Client Acquisition_% Achievement',
    'LAS_% Achievement'
]

kpi_numeric_cols = [
    'Net Sales through MARS',
    'Net NJ PMS Sales',
    'SIP to Net Sales Ratio_Ratio'
]

all_kpis = kpi_percent_cols + kpi_numeric_cols

def normalize_kpis(df, kpis):
    df_norm = df.copy()
    for col in kpis:
        max_val = df_norm[col].max()
        if max_val != 0:
            df_norm[col + '_Normalized'] = df_norm[col] / max_val * 100
        else:
            df_norm[col + '_Normalized'] = 0
    return df_norm

def compute_consolidated_score(df, kpis):
    norm_cols = [col + '_Normalized' for col in kpis]
    df_scores = df.groupby('Partner Name')[norm_cols].mean().reset_index()
    df_scores['Consolidated Score'] = df_scores[norm_cols].mean(axis=1)
    return df_scores

def get_top_partners(df_scores, top_n=5):
    return df_scores.sort_values(by='Consolidated Score', ascending=False).head(top_n)

def generate_combined_insights(df_top):
    insights_text = ""

    def generate_for_partner(row):
        partner = row['Partner Name']
        cons_score = round(row['Consolidated Score'], 2)
        kpi_scores = {kpi: round(row[kpi + '_Normalized'], 2) for kpi in all_kpis}

        prompt = f"""
You are an expert business analyst. Analyze the performance of partner '{partner}'.
Consolidated Score: {cons_score}%
Individual KPI Scores: {kpi_scores}

Generate descriptive insights:
- How the partner is performing overall
- Strengths and weaknesses based on KPI scores

Generate prescriptive insights:
- Recommended actions to improve performance
- Areas to focus on for growth

2. KPI Insights:
You are an expert business analyst. Analyze the KPI '{kpi}' for partner '{partner}'.
For each KPI (Equity Net Sales_% Achievement, SIP Sales_% Achievement, Client Acquisition_% Achievement, LAS_% Achievement, Net Sales through MARS):

Generate descriptive insights:
- How the partner is performing on this KPI
- Strengths or weaknesses

Generate prescriptive insights:
- Recommended actions to improve this KPI
- Priority areas for focus
"""
        try:
            resp = model.generate_content(
                prompt,
                generation_config={
                    "temperature": 0.7,
                    "top_p": 0.9,
                    "top_k": 40,
                    "max_output_tokens": 2048  # enough for full insights
                }
            )
            return f"\nPartner: {partner}\n{resp.text}\n{'-'*80}\n"
        except Exception as e:
            return f"Error generating insights for {partner}: {e}\n"

    # Use concurrent execution for faster generation
    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(executor.map(generate_for_partner, [row for _, row in df_top.iterrows()]))

    insights_text = "\n".join(results)
    return insights_text

def main(merged_df, top_n=5):
    df_norm = normalize_kpis(merged_df, all_kpis)
    df_scores = compute_consolidated_score(df_norm, all_kpis)
    df_top = get_top_partners(df_scores, top_n=top_n)
    partner_insights = generate_combined_insights(df_top)
    return partner_insights

partner_insights = main(merged_df)  
print(partner_insights)



Partner: VA Financial Products Distribution LLP
Okay, let's break down the performance of VA Financial Products Distribution LLP and provide actionable insights.

**1. Overall Partner Performance Analysis:**

**Descriptive Insights:**

*   **Overall Performance:** VA Financial Products Distribution LLP has a consolidated score of 54.7%, indicating **underperformance**. This score suggests that the partner is not consistently meeting its targets across various key performance indicators.
*   **Strengths:** The partner demonstrates strong performance in **SIP Sales (% Achievement at 80.07%), Net Sales through MARS (82.46%), SIP to Net Sales Ratio (82.98) and Equity Net Sales (% Achievement at 68.52%)**. This suggests a good understanding of SIP products, effective utilization of the MARS platform, and an ability to convert sales into SIP investments.
*   **Weaknesses:**  Significant weaknesses exist in **Insurance (% Achievement at a dismal 0.18%), LAS (% Achievement at 9.19%), and Clie

In [199]:
merged_df.head(2)

Unnamed: 0,Sr No.,Partner Code,Partner Name,Center_x,Category,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year,Sr No,Broker Code,Doer Name,Doer Type,Center_y,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement,Total_Performance
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,1,23676,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0,4999.76,1.67,17.82,5954.006
1,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,2,20361,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0,111994.42,37.33,41.04,290762.85


In [112]:
import pandas as pd
import concurrent.futures
import google.generativeai as genai

gemini_api_key = google_api_key  
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

kpi_percent_cols = [
    'Equity Net Sales_% Achievement',
    'SIP Sales_% Achievement',
    'Insurance_% Achievement',
    'Client Acquisition_% Achievement',
    'LAS_% Achievement'
]

kpi_numeric_cols = [
    'Net Sales through MARS',
    'Net NJ PMS Sales',
    'SIP to Net Sales Ratio_Ratio'
]

all_kpis = kpi_percent_cols + kpi_numeric_cols

def normalize_kpis(df, kpis):
    df_norm = df.copy()
    for col in kpis:
        max_val = df_norm[col].max()
        if max_val != 0:
            df_norm[col + '_Normalized'] = df_norm[col] / max_val * 100
        else:
            df_norm[col + '_Normalized'] = 0
    return df_norm

def compute_consolidated_score(df, kpis):
    norm_cols = [col + '_Normalized' for col in kpis]
    df_scores = df.groupby('Partner Name')[norm_cols].mean().reset_index()
    df_scores['Consolidated Score'] = df_scores[norm_cols].mean(axis=1)
    return df_scores

def get_top_partners(df_scores, top_n=5):
    return df_scores.sort_values(by='Consolidated Score', ascending=False).head(top_n)

def generate_combined_insights(df_top):
    insights_text = ""

    def generate_for_partner(row):
        partner = row['Partner Name']
        cons_score = round(row['Consolidated Score'], 2)
        kpi_scores = {kpi: round(row[kpi + '_Normalized'], 2) for kpi in all_kpis}

        prompt = f"""
You are an expert business analyst. Analyze partner '{partner}'.

**Consolidated Score:** {cons_score}%
**KPI Achievements:** {kpi_scores}

Generate insights in one response:

1. **Consolidated Score Insights:** 
   - Overall performance
   - Strengths and weaknesses
   - Recommended actions

2. **KPI-wise Insights:** 
   For each KPI (Equity Net Sales_% Achievement, SIP Sales_% Achievement, Client Acquisition_% Achievement, LAS_% Achievement, Net Sales through MARS):
   - Descriptive insights (performance, strengths, weaknesses)
   - Prescriptive insights (actions, priority areas)

Format the output clearly with headings, bullet points, and separators for each partner.

"""
        try:
            resp = model.generate_content(
                prompt,
                generation_config={
                    "temperature": 0.7,
                    "top_p": 0.9,
                    "top_k": 40,
                    "max_output_tokens": 2048  # enough for full insights
                }
            )
            return f"\nPartner: {partner}\n{resp.text}\n{'-'*80}\n"
        except Exception as e:
            return f"Error generating insights for {partner}: {e}\n"

    # Use concurrent execution for faster generation
    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(executor.map(generate_for_partner, [row for _, row in df_top.iterrows()]))

    insights_text = "\n".join(results)
    return insights_text

def main(merged_df, top_n=5):
    df_norm = normalize_kpis(merged_df, all_kpis)
    df_scores = compute_consolidated_score(df_norm, all_kpis)
    df_top = get_top_partners(df_scores, top_n=top_n)
    partner_insights = generate_combined_insights(df_top)
    return partner_insights

partner_insights = main(merged_df)  
print(partner_insights)



Partner: VA Financial Products Distribution LLP
## Partner Analysis: VA Financial Products Distribution LLP

**Consolidated Score:** 54.7%

**1. Consolidated Score Insights:**

*   **Overall Performance:** VA Financial Products Distribution LLP's consolidated score of 54.7% indicates **underperformance**. The partner is not meeting overall business objectives and requires immediate attention and improvement strategies.
*   **Strengths:** There are some positive indicators, particularly in SIP Sales and Net Sales through MARS, suggesting competence in certain product lines and platform utilization.
*   **Weaknesses:** The low consolidated score is driven by significant underperformance in Insurance, Client Acquisition, and LAS. This suggests issues with diversification, client outreach, and potentially, risk management or product suitability.
*   **Recommended Actions:**
    *   **Prioritize Improvement:** Focus on KPIs with the lowest achievement percentages, particularly Insurance, C

In [51]:
merged_df['Broker Code'] = merged_df['Broker Code'].astype(str)
MIS_total['Broker Code'] = MIS_total['Broker Code'].astype(str)

In [52]:
merged_df.columns

Index(['Sr No.', 'Partner Code', 'Partner Name', 'Center_x', 'Category',
       'Relationship Handler', 'Investment Net Sales Target',
       'Investment Net Sales Achievement',
       'Investment Net Sales % Achievement', 'Equity Sales', 'MIP Sales',
       'Gold Sales', 'Sales in Physical Assets ', 'Sales in Direct Equity ',
       'FD + Bond (Primary Market) Sales', 'Secondary Market Bond Sales',
       'Net Sales Through Realty', 'Net NJ PMS Sales', 'Net Non-NJ PMS Sales',
       'Net Sales through MARS', 'SIP Sales Target', 'SIP Sales Achievement',
       'SIP Sales % Achievement', 'Fresh Gross SIP Sales',
       'SIP Closure / Termination', 'FY_Year', 'Sr No', 'Broker Code',
       'Doer Name', 'Doer Type', 'Center_y', 'Equity Net Sales_Target',
       'Equity Net Sales_Achievement', 'Equity Net Sales_% Achievement',
       'Insurance_Target', 'Insurance_Achievement', 'Insurance_% Achievement',
       'SIP Sales_Target', 'SIP Sales_Achievement', 'SIP Sales_% Achievement',
       

In [68]:
MIS_total.head()

Unnamed: 0,ZM,SRM,RM,BM,Broker Code,Partner Name,Category,Doer Name,Doer Type,Center,Age in NJ,Age in NJ Target,Total AUM FY 23-24 Q4 YTD,Equity+ Hyb AUM FY 23-24 Q4 YTD,LIVE SIP FY 23-24 Q4 YTD,Total Net Sales\nFY 23-24 YTD,Equity Net Sales\nFY 23-24 YTD,Net SIP\nFY 23-24 YTD,MARS AUM FY 23-24 YTD,MARS Net Sales FY 23-24 YTD,PMS AUM FY 23-24 YTD,Net Sales FY 23-24 YTD,Clients Acquired FY 23-24 YTD,Live Accounts FY 23-24 YTD,Saturday School (YTD) FY 23-24 Q4 YTD,Investment\nSaturday School (YTD) FY 23-24 Q4 YTD,Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD,Total Group FY 23-24 Q4 YTD,Group Covered FY 23-24 Q4 YTD,% Covered,Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr),Total Reviews\nFY 23-24 Q4 YTD,Amount FY 23-24 Q4 YTD\n(in Cr),Flexicap Target\nFY 23-24 Q4 YTD\n(in Cr),Flexicap Ach\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Target\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Ach\nFY 23-24 Q4 YTD\n(in Cr),Target Qty (FY 23-24 Q4 YTD)\n(in Cr),Order Qty\n(FY 23-24 Q4 YTD)\n(in Cr),Equity Net Sales% Achievement,SIP Sales% Achievement,Client Acquisition% Achievement,Insurance% Achievement,LAS% Achievement,Total % Achievement,Status,FY_Year,Total AUM FY 24-25 Q4 YTD,Equity AUM FY 24-25 Q4 YTD,LIVE SIP FY 24-25 Q4 YTD,Total Net Sales\nFY 24-25 Q4 YTD,Equity Net Sales\nFY 24-25 Q4 YTD,Net SIP\nFY 24-25 Q4 YTD,MARS AUM FY 24-25 Q4 YTD,MARS Net Sales FY 24-25 Q4 YTD,PMS AUM FY 24-25 Q4 YTD,Net Sales FY 24-25 Q4 YTD,Clients Acquired FY 24-25 Q4 YTD,Live Accounts (Non-D) FY 24-25 Q4 YTD,Saturday School (YTD) FY 24-25 Q4 YTD,Investment\nSaturday School (YTD) FY 24-25 Q4 YTD,Insurance\nSaturday School (YTD) FY 24-25 Q4 YTD,Total Group FY 24-25 Q4 YTD,Group Covered FY 24-25 Q4 YTD,Total Reviews\nFY 24-25 Q4 YTD,Non-NJ AUM\nFY 24-25 Q4 YTD,Amount FY 24-25 Q4 YTD,Flexicap Target\nFY 24-25 Q4 YTD,Flexicap Ach\nFY 24-25 Q4 YTD,AMC NS Target\nFY 24-25 Q4 YTD,AMC NS Ach\nFY 24-25 Q4 YTD,Target Qty (FY 24-25 Q4 YTD)\n(in Cr),Order Qty\n(FY 24-25 Q4 YTD)\n(in Cr),MARS TO EQ AUM
0,KOUSHIK GHOSH,,KAMLESH KUMAR,,33054,BIPLAB ROY,NON D,Santanu Deb,Fundz Express,AGARTALA,3,3,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,0,,,,,,,,,0,,,,,,,0.0,,0.0,12.5,,2.5,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,KOUSHIK GHOSH,,KAMLESH KUMAR,,88928,Santanu Deb,NON D,Santanu Deb,Fundz Express,AGARTALA,55,8,7090659.76,7090659.76,228989.7,1935230.657,1935230.666,95995.23,1999435,1050000,0,0,16,35,,,,,,,,,0,,,,,,,54.58,0.0,119.85,64.58,0.0,70.7,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,KOUSHIK GHOSH,,KAMLESH KUMAR,,33434,TAPAS RUDRA PAL,NON D,Santanu Deb,Fundz Express,AGARTALA,3,3,2114389.27,2076409.88,108495.24,2082466.446,2045363.633,108495.24,0,0,0,0,30,27,,,,,,,,,0,,,,,,,233.37,,350.68,362.5,0.0,275.01,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,PRASHANT ANANTRAI KAKKAD,BHAVESH B. JOSHI,ABHAY KUMAR VISHWAKARMA,AALEY NABI,26185,AMIT GANGWAR,NON D,SAURABH AGRAWAL,UNIT MANAGER,AGRA,13,13,885945.96,885945.96,67096.73,567009.9,567009.902,60096.98,0,0,0,0,18,23,,,,,,,,,0,,,,,,,19.75,0.0,58.35,62.5,0.0,30.83,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,PRASHANT ANANTRAI KAKKAD,BHAVESH B. JOSHI,ABHAY KUMAR VISHWAKARMA,AALEY NABI,21754,ANJANA VARSHNEY,NON D,SAURABH AGRAWAL,UNIT MANAGER,AGRA,19,13,325872.43,325872.43,9999.5,189993.304,189993.304,-8999.55,0,0,0,0,0,2,,,,,,,,,0,,,,,,,6.98,,-9.8,4.17,0.0,-1.22,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [53]:
MIS_total.head(2)

Unnamed: 0,ZM,SRM,RM,BM,Broker Code,Partner Name,Category,Doer Name,Doer Type,Center,Age in NJ,Age in NJ Target,Total AUM FY 23-24 Q4 YTD,Equity+ Hyb AUM FY 23-24 Q4 YTD,LIVE SIP FY 23-24 Q4 YTD,Total Net Sales\nFY 23-24 YTD,Equity Net Sales\nFY 23-24 YTD,Net SIP\nFY 23-24 YTD,MARS AUM FY 23-24 YTD,MARS Net Sales FY 23-24 YTD,PMS AUM FY 23-24 YTD,Net Sales FY 23-24 YTD,Clients Acquired FY 23-24 YTD,Live Accounts FY 23-24 YTD,Saturday School (YTD) FY 23-24 Q4 YTD,Investment\nSaturday School (YTD) FY 23-24 Q4 YTD,Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD,Total Group FY 23-24 Q4 YTD,Group Covered FY 23-24 Q4 YTD,% Covered,Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr),Total Reviews\nFY 23-24 Q4 YTD,Amount FY 23-24 Q4 YTD\n(in Cr),Flexicap Target\nFY 23-24 Q4 YTD\n(in Cr),Flexicap Ach\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Target\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Ach\nFY 23-24 Q4 YTD\n(in Cr),Target Qty (FY 23-24 Q4 YTD)\n(in Cr),Order Qty\n(FY 23-24 Q4 YTD)\n(in Cr),Equity Net Sales% Achievement,SIP Sales% Achievement,Client Acquisition% Achievement,Insurance% Achievement,LAS% Achievement,Total % Achievement,Status,FY_Year,Total AUM FY 24-25 Q4 YTD,Equity AUM FY 24-25 Q4 YTD,LIVE SIP FY 24-25 Q4 YTD,Total Net Sales\nFY 24-25 Q4 YTD,Equity Net Sales\nFY 24-25 Q4 YTD,Net SIP\nFY 24-25 Q4 YTD,MARS AUM FY 24-25 Q4 YTD,MARS Net Sales FY 24-25 Q4 YTD,PMS AUM FY 24-25 Q4 YTD,Net Sales FY 24-25 Q4 YTD,Clients Acquired FY 24-25 Q4 YTD,Live Accounts (Non-D) FY 24-25 Q4 YTD,Saturday School (YTD) FY 24-25 Q4 YTD,Investment\nSaturday School (YTD) FY 24-25 Q4 YTD,Insurance\nSaturday School (YTD) FY 24-25 Q4 YTD,Total Group FY 24-25 Q4 YTD,Group Covered FY 24-25 Q4 YTD,Total Reviews\nFY 24-25 Q4 YTD,Non-NJ AUM\nFY 24-25 Q4 YTD,Amount FY 24-25 Q4 YTD,Flexicap Target\nFY 24-25 Q4 YTD,Flexicap Ach\nFY 24-25 Q4 YTD,AMC NS Target\nFY 24-25 Q4 YTD,AMC NS Ach\nFY 24-25 Q4 YTD,Target Qty (FY 24-25 Q4 YTD)\n(in Cr),Order Qty\n(FY 24-25 Q4 YTD)\n(in Cr),MARS TO EQ AUM
0,KOUSHIK GHOSH,,KAMLESH KUMAR,,33054,BIPLAB ROY,NON D,Santanu Deb,Fundz Express,AGARTALA,3,3,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,0,,,,,,,,,0,,,,,,,0.0,,0.0,12.5,,2.5,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,KOUSHIK GHOSH,,KAMLESH KUMAR,,88928,Santanu Deb,NON D,Santanu Deb,Fundz Express,AGARTALA,55,8,7090659.76,7090659.76,228989.7,1935230.657,1935230.666,95995.23,1999435,1050000,0,0,16,35,,,,,,,,,0,,,,,,,54.58,0.0,119.85,64.58,0.0,70.7,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,


###  NEW LOGIC 

In [70]:
merged_with_hierarchyy= pd.merge(
    merged_df,
    MIS_total[['ZM', 'SRM', 'RM', 'BM', 'Broker Code']],
    on='Broker Code',  
    how='left'
)

In [200]:
merged_with_hierarchyy.head(5)

Unnamed: 0,Sr No.,Partner Code,Partner Name_x,Center_x,Category_x,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year_x,Sr No,Broker Code,Doer Name_x,Doer Type_x,Center_y,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement_x,ZM,SRM,RM,BM,Partner Name_y,Category_y,Doer Name_y,Doer Type_y,Center,Age in NJ,Age in NJ Target,Total AUM FY 23-24 Q4 YTD,Equity+ Hyb AUM FY 23-24 Q4 YTD,LIVE SIP FY 23-24 Q4 YTD,Total Net Sales FY 23-24 YTD,Equity Net Sales FY 23-24 YTD,Net SIP FY 23-24 YTD,MARS AUM FY 23-24 YTD,MARS Net Sales FY 23-24 YTD,PMS AUM FY 23-24 YTD,Net Sales FY 23-24 YTD,Clients Acquired FY 23-24 YTD,Live Accounts FY 23-24 YTD,Saturday School (YTD) FY 23-24 Q4 YTD,Investment Saturday School (YTD) FY 23-24 Q4 YTD,Insurance Saturday School (YTD) FY 23-24 Q4 YTD,Total Group FY 23-24 Q4 YTD,Group Covered FY 23-24 Q4 YTD,% Covered,Non-NJ AUM FY 23-24 Q4 YTD (in Cr),Total Reviews FY 23-24 Q4 YTD,Amount FY 23-24 Q4 YTD (in Cr),Flexicap Target FY 23-24 Q4 YTD (in Cr),Flexicap Ach FY 23-24 Q4 YTD (in Cr),AMC NS Target FY 23-24 Q4 YTD (in Cr),AMC NS Ach FY 23-24 Q4 YTD (in Cr),Target Qty (FY 23-24 Q4 YTD) (in Cr),Order Qty (FY 23-24 Q4 YTD) (in Cr),Equity Net Sales% Achievement,SIP Sales% Achievement,Client Acquisition% Achievement,Insurance% Achievement,LAS% Achievement,Total % Achievement_y,Status,FY_Year_y,Total AUM FY 24-25 Q4 YTD,Equity AUM FY 24-25 Q4 YTD,LIVE SIP FY 24-25 Q4 YTD,Total Net Sales FY 24-25 Q4 YTD,Equity Net Sales FY 24-25 Q4 YTD,Net SIP FY 24-25 Q4 YTD,MARS AUM FY 24-25 Q4 YTD,MARS Net Sales FY 24-25 Q4 YTD,PMS AUM FY 24-25 Q4 YTD,Net Sales FY 24-25 Q4 YTD,Clients Acquired FY 24-25 Q4 YTD,Live Accounts (Non-D) FY 24-25 Q4 YTD,Saturday School (YTD) FY 24-25 Q4 YTD,Investment Saturday School (YTD) FY 24-25 Q4 YTD,Insurance Saturday School (YTD) FY 24-25 Q4 YTD,Total Group FY 24-25 Q4 YTD,Group Covered FY 24-25 Q4 YTD,Total Reviews FY 24-25 Q4 YTD,Non-NJ AUM FY 24-25 Q4 YTD,Amount FY 24-25 Q4 YTD,Flexicap Target FY 24-25 Q4 YTD,Flexicap Ach FY 24-25 Q4 YTD,AMC NS Target FY 24-25 Q4 YTD,AMC NS Ach FY 24-25 Q4 YTD,Target Qty (FY 24-25 Q4 YTD) (in Cr),Order Qty (FY 24-25 Q4 YTD) (in Cr),MARS TO EQ AUM,Total AUM Combined,Equity Sales as % of Total AUM,Live SIP As % of Total AUM,PMS AUM as % of Total AUM,Equity+Hyb AUM Combined,SIP Closure to Net SIP %,MARS AUM as % Total AUM,MARS Net Sales as % Equity Sales,Equity Sales as % of Total AUM 23_24,Equity Sales as % of Total AUM 24_25,Live SIP As % of Total AUM 23_24,Live SIP As % of Total AUM 24_25,PMS AUM as % of Total AUM 23_24,PMS AUM as % of Total AUM 24_25,SIP Closure to Net SIP % 23_24,SIP Closure to Net SIP % 24_25,MARS AUM as % Total AUM 23_24,MARS AUM as % Total AUM 24_25,MARS Net Sales as % Equity Sales 23_24,MARS Net Sales as % Equity Sales 24_25
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,1,23676,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82,E-Surat,,MANOJ PATEL,DENISH M. PATEL,ALOKE CHATTERJEE,D,YOGESHBHAI H. ANJARA,UNIT MANAGER,E - SURAT,16,16,1957855.85,1875223.02,9999.51,1600911.06,1524923.86,4999.75,1878565,1615000.0,0,0,4,4,,,,,,,,,100000,,,,,,,98.16,,8.17,16.67,200,59.44,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,,1957855.85,77.887443,,0.0,1875223.02,0.0,95.950118,105.906927,77.887443,0.0,0.510738,,0.0,0.0,0.0,,95.950118,0.0,105.906927,
2,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,2,20361,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04,E-Surat,,MANOJ PATEL,DENISH M. PATEL,ARINDAM CHAKRAVARTI,D,YOGESHBHAI H. ANJARA,UNIT MANAGER,E - SURAT,21,21,3626626.4,3560768.19,109994.52,1557645.832,1574721.26,57997.1,2715004,1202730.35,0,0,0,12,,,,,,,,,0,,,,,,,73.93,6.01,82.62,0.0,0,41.07,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,,3626626.4,43.421105,,0.0,3560768.19,0.0,74.863074,76.377349,43.421105,0.0,3.032971,,0.0,0.0,0.0,,74.863074,0.0,76.377349,
4,3,24695,CHINTU KUMAR SHAW,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1200000,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,27000,0.0,0.0,0.0,0.0,2023,3,24695,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1200000,0.0,0.0,,,,27000,0.0,0.0,16,3.0,18.75,0.0,0,,0.0,0.0,2.81,E-Surat,,MANOJ PATEL,DENISH M. PATEL,CHINTU KUMAR SHAW,D,YOGESHBHAI H. ANJARA,UNIT MANAGER,E - SURAT,15,15,31224.54,31224.54,8999.56,27998.67,27998.67,8999.56,0,0.0,0,0,1,2,,,,,,,,,0,,,,,,,1.88,,18.1,4.17,0,8.58,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,,31224.54,89.668799,,0.0,31224.54,0.0,0.0,0.0,89.668799,0.0,28.822074,,0.0,0.0,0.0,,0.0,0.0,0.0,
6,4,24040,GOURAB PURKAIT,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,26998.7,1.8,26998.7,0,0,0,0,0,0,0,0,0,0,33750,10499.49,31.11,10499.49,0.0,2023,4,24040,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,26998.7,1.8,,,,33750,10499.49,31.11,20,3.0,15.0,3.11,0,0.0,6999.66,2.33,14.59,E-Surat,,MANOJ PATEL,SIDDHARTH P. HATHIWALA,GOURAB PURKAIT,D,TIKLE JAY,UNIT MANAGER,E - SURAT 2,16,16,107135.97,107135.97,7010.66,71916.786,71916.786,11.0,0,0.0,0,0,0,1,,,,,,,,,0,,,,,,,3.7,,0.02,0.0,0,0.99,MUTUAL FUND-Terminated,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,,107135.97,67.126649,,0.0,107135.97,0.0,0.0,0.0,67.126649,0.0,6.543703,,0.0,0.0,0.0,,0.0,0.0,0.0,
8,5,25776,RAJ KISHORE BARIK,24 SOUTH PARGANA,NON D,SUBRATA MAITY,600000,496499.92,82.75,1499.92,0,0,0,0,0,0,0,0,0,495000,13500,1499.92,11.11,1499.92,0.0,2023,5,25776,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,600000,496499.92,82.75,,,,13500,1499.92,11.11,8,3.0,37.5,0.0,0,,0.0,0.0,40.82,E-Surat,,MANOJ PATEL,DENISH M. PATEL,RAJ KISHORE BARIK,D,YOGESHBHAI H. ANJARA,UNIT MANAGER,E - SURAT,13,13,3423264.52,3423264.52,9999.5,2413879.3,2413879.3,7999.6,0,500000.0,0,0,1,3,,,,,,,,,0,,,,,,,138.85,,16.13,4.17,0,44.32,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,,3423264.52,70.513958,,0.0,3423264.52,0.0,0.0,20.713546,70.513958,0.0,0.292104,,0.0,0.0,0.0,,0.0,0.0,20.713546,


In [61]:
merged_df['Broker Code'] = merged_df['Broker Code'].astype(str).str.strip()
MIS_total['Broker Code'] = MIS_total['Broker Code'].astype(str).str.strip()


In [73]:
merged_with_hierarchyy.head()


Unnamed: 0,Sr No.,Partner Code,Partner Name,Center_x,Category,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year,Broker Code,Doer Name,Doer Type,Center_y,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement,Total_Performance,ZM,SRM,RM,BM
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,2023,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82,17.5075,,,,
1,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,2023,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04,41.0625,,,,
2,3,24695,CHINTU KUMAR SHAW,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1200000,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,27000,0.0,0.0,0.0,0.0,2023,2023,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1200000,0.0,0.0,,,,27000,0.0,0.0,16,3.0,18.75,0.0,0,,0.0,0.0,2.81,4.6875,,,,
3,4,24040,GOURAB PURKAIT,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,26998.7,1.8,26998.7,0,0,0,0,0,0,0,0,0,0,33750,10499.49,31.11,10499.49,0.0,2023,2023,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,26998.7,1.8,,,,33750,10499.49,31.11,20,3.0,15.0,3.11,0,0.0,6999.66,2.33,14.59,11.9775,,,,
4,5,25776,RAJ KISHORE BARIK,24 SOUTH PARGANA,NON D,SUBRATA MAITY,600000,496499.92,82.75,1499.92,0,0,0,0,0,0,0,0,0,495000,13500,1499.92,11.11,1499.92,0.0,2023,2023,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,600000,496499.92,82.75,,,,13500,1499.92,11.11,8,3.0,37.5,0.0,0,,0.0,0.0,40.82,32.84,,,,


In [114]:
import pandas as pd
import google.generativeai as genai

# ===============================
# 🔹 Gemini API Setup
# ===============================
gemini_api_key = google_api_key
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

# ===============================
# 🔹 KPI Columns
# ===============================
kpi_percent_cols = [
    'Equity Net Sales_% Achievement',
    'SIP Sales_% Achievement',
    'Insurance_% Achievement',
    'Client Acquisition_% Achievement',
    'LAS_% Achievement'
]

kpi_numeric_cols = [
    'Net Sales through MARS',
    'Net NJ PMS Sales',
    'SIP to Net Sales Ratio_Ratio'
]

all_kpis = kpi_percent_cols + kpi_numeric_cols

# ===============================
# 🔹 Normalize KPI Values
# ===============================
def normalize_kpis(df, kpis):
    df_norm = df.copy()
    for col in kpis:
        max_val = df_norm[col].max()
        if pd.notna(max_val) and max_val != 0:
            df_norm[col + '_Normalized'] = df_norm[col] / max_val * 100
        else:
            df_norm[col + '_Normalized'] = 0
    return df_norm

# ===============================
# 🔹 Compute Consolidated Score per Group
# ===============================
def compute_consolidated_score(df, kpis, group_col):
    norm_cols = [col + '_Normalized' for col in kpis]
    df_scores = df.groupby(group_col, dropna=True)[norm_cols].mean().reset_index()
    df_scores['Consolidated Score'] = df_scores[norm_cols].mean(axis=1)
    return df_scores

# ===============================
# 🔹 Generate Batched Insights per BM
# ===============================
def generate_batched_bm_insights(df, top_bm=2):
    df_norm = normalize_kpis(df, all_kpis)
    insights_text = ""

    # Limit to top N BMs for faster run
    unique_bms = df_norm['BM'].dropna().unique()[:top_bm]
    df_norm = df_norm[df_norm['BM'].isin(unique_bms)]

    for bm, bm_group in df_norm.groupby('BM'):
        insights_text += f"\n{'='*120}\nBM LEVEL INSIGHTS: {bm}\n{'='*120}\n"

        # Compute BM consolidated score
        bm_score = compute_consolidated_score(bm_group, all_kpis, 'BM')
        bm_score_val = round(bm_score['Consolidated Score'].iloc[0], 2)

        # Prepare RM + Partner summary table
        rm_partner_summary = (
            bm_group.groupby(['RM', 'Partner Name'])[all_kpis]
            .mean()
            .reset_index()
        )
        rm_partner_summary_text = rm_partner_summary.to_string(index=False)

        # Prepare single prompt for entire BM
        prompt = f"""
You are a senior business analyst.
BM: '{bm}' with consolidated score {bm_score_val}%.
Below is performance data for RMs and Partners under this BM:

{rm_partner_summary_text}

Generate:
1️⃣ Descriptive insights for the BM, including strengths and weaknesses.
2️⃣ Key RMs and Partners driving or dragging performance.
3️⃣ Prescriptive recommendations for BM to improve overall results.
4️⃣ Insights for top performing RMs and Partners under this BM.
"""
        try:
            resp = model.generate_content(
                prompt,
                generation_config={
                    "temperature": 0.6,
                    "top_p": 0.9,
                    "max_output_tokens": 2048
                }
            )
            insights_text += f"{resp.text}\n{'='*120}\n"
        except Exception as e:
            insights_text += f"Error generating insights for BM {bm}: {e}\n"

    return insights_text

# ===============================
# 🔹 Run Batched Hierarchy Insights
# ===============================
hierarchy_insights = generate_batched_bm_insights(merged_with_hierarchyy, top_bm=2)
print(hierarchy_insights)



BM LEVEL INSIGHTS: DENISH M. PATEL
Okay, let's analyze the performance data for BM "DENISH M. PATEL" and provide actionable insights.

**1. Descriptive Insights for BM "DENISH M. PATEL"**

*   **Overall Performance:** With a consolidated score of 3.04%, DENISH M. PATEL's overall performance is concerning and indicates significant room for improvement. This score suggests that the team, on average, is underperforming against targets.
*   **Strengths (Potential):**
    *   **High Number of RMs and Partners:** The sheer volume of RMs and Partners under this BM suggests a potentially large network and reach. This can be a strength if properly leveraged.
    *   **High Achievers:** There are some standout performers (identified later) demonstrating that success is possible within this structure. This indicates that the BM has the potential to foster a high-performing environment.
*   **Weaknesses:**
    *   **Inconsistency:** A significant variation in performance across RMs and Partners. 

In [121]:
merged_df.head()

Unnamed: 0,Sr No.,Partner Code,Partner Name,Center_x,Category,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year,Sr No,Broker Code,Doer Name,Doer Type,Center_y,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement,Total_Performance
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700.0,33750,8624.59,25.55,8624.59,0.0,2023,1,23676,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82,17.5075
1,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650.0,75600,62996.85,83.33,215239.24,152242.39,2023,2,20361,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04,41.0625
2,3,24695,CHINTU KUMAR SHAW,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1200000,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,27000,0.0,0.0,0.0,0.0,2023,3,24695,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1200000,0.0,0.0,,,,27000,0.0,0.0,16,3.0,18.75,0.0,0,,0.0,0.0,2.81,4.6875
3,4,24040,GOURAB PURKAIT,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,26998.7,1.8,26998.7,0,0,0,0,0,0,0,0,0,0.0,33750,10499.49,31.11,10499.49,0.0,2023,4,24040,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,26998.7,1.8,,,,33750,10499.49,31.11,20,3.0,15.0,3.11,0,0.0,6999.66,2.33,14.59,11.9775
4,5,25776,RAJ KISHORE BARIK,24 SOUTH PARGANA,NON D,SUBRATA MAITY,600000,496499.92,82.75,1499.92,0,0,0,0,0,0,0,0,0,495000.0,13500,1499.92,11.11,1499.92,0.0,2023,5,25776,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,600000,496499.92,82.75,,,,13500,1499.92,11.11,8,3.0,37.5,0.0,0,,0.0,0.0,40.82,32.84


In [76]:
rowsss = merged_df[merged_df['Partner Name'] == 'VA Financial Products Distribution LLP']

In [77]:
rowsss

Unnamed: 0,Sr No.,Partner Code,Partner Name,Center_x,Category,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year,Broker Code,Doer Name,Doer Type,Center_y,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement,Total_Performance
46560,23439,1461,VA Financial Products Distribution LLP,Surat (ALT CHANNEL),NON D,VA Financial Products Distribution LLP,13608000,2731750202.19,20074.59,1639518330.61,-3149360.55,-1483279.61,0,0,1244500,0,0,331245797.75,0,753660657.92,226800,26413006.39,11645.95,43983185.69,17570179.3,2024,2024,VA Financial Products Distribution LLP,Agency Manager,Surat (ALT CHANNEL),13608000,2731750202.19,20074.59,24208768.66,37281966.15,154.0,226800,26413006.39,11645.95,48,925,1927.08,27162774.44,1442000,5.31,87078635.02,23036.68,7836.8,8413.2325
79784,30952,1461,VA Financial Products Distribution LLP,SURAT - VESU,NON D,VA Financial Products Distribution LLP,6032880,3269519068.1,54195.0,1778494517.64,2476858.45,76555121.12,0,0,742000,0,0,127494722.75,122970636,1160785212.14,144020,15419066.05,10706.2,39401031.5,23981965.45,2025,2025,VA Financial Products Distribution LLP,Agency Manager,SURAT - VESU,6032880,3269519068.1,54195.0,28162590.59,47158512.7,167.45,144020,15419066.05,10706.2,51,1114,2184.31,41418053.19,13021389,31.44,110284108.44,15195.67,14423.51,16779.2375


### Consolidated

In [92]:
import pandas as pd
import concurrent.futures
import google.generativeai as genai

class PerformanceInsightGenerator:
    """
    Generates deep descriptive and prescriptive insights:
    a) Consolidated Score Level (for Employees/Partners)
    b) Parameter-Wise Scores
    """

    def __init__(self, api_key):
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel("gemini-2.0-flash")

        self.kpi_percent_cols = [
            'Equity Net Sales_% Achievement',
            'SIP Sales_% Achievement',
            'Insurance_% Achievement',
            'Client Acquisition_% Achievement',
            'LAS_% Achievement'
        ]
        self.kpi_numeric_cols = [
            'Net Sales through MARS',
            'Net NJ PMS Sales',
            'SIP to Net Sales Ratio_Ratio'
        ]
        self.parameter_kpis = [
            'Equity Net Sales_% Achievement',
            'SIP Sales_% Achievement',
            'Client Acquisition_% Achievement',
            'Net Sales through MARS',
            'LAS_% Achievement'
        ]
        self.all_kpis = self.kpi_percent_cols + self.kpi_numeric_cols
 
    def normalize_kpis(self, df):
        df_norm = df.copy()
        for col in self.all_kpis:
            max_val = df_norm[col].max()
            df_norm[col + '_Normalized'] = (
                df_norm[col] / max_val * 100 if max_val != 0 else 0
            )
        return df_norm

    def compute_consolidated_score(self, df):
        norm_cols = [col + '_Normalized' for col in self.all_kpis]
        df_scores = df.groupby('Partner Name')[norm_cols].mean().reset_index()
        df_scores['Consolidated Score'] = df_scores[norm_cols].mean(axis=1)
        return df_scores

    def get_top_partners(self, df_scores, top_n=5):
        return df_scores.sort_values(by='Consolidated Score', ascending=False).head(top_n)

    def generate_consolidated_insights(self, df_top):
        def generate_for_partner(row):
            partner = row['Partner Name']
            cons_score = round(row['Consolidated Score'], 2)
            kpi_scores = {kpi: round(row[kpi + '_Normalized'], 2) for kpi in self.all_kpis}

            prompt = f"""
You are an expert business analyst. Analyze the performance of partner '{partner}'.
Consolidated Score: {cons_score}%
Individual KPI Scores: {kpi_scores}

Generate concise yet insightful text:
1. Descriptive Insights: How the partner is performing overall and key strengths/weaknesses.
2. Prescriptive Insights: Actions or focus areas to improve performance.
            """
            try:
                resp = self.model.generate_content(
                    prompt,
                    generation_config={
                        "temperature": 0.7,
                        "top_p": 0.9,
                        "top_k": 40,
                        "max_output_tokens": 1024
                    }
                )
                return f"\nPartner: {partner}\n{resp.text}\n{'-'*90}\n"
            except Exception as e:
                return f"Error generating insights for {partner}: {e}\n"

        with concurrent.futures.ThreadPoolExecutor() as executor:
            results = list(executor.map(generate_for_partner, [row for _, row in df_top.iterrows()]))
        return "\n".join(results)


    def generate_parameter_insights(self, df, top_n=5):
        df['Total_Performance'] = df[self.parameter_kpis].mean(axis=1)
        top_partners = df.sort_values(by='Total_Performance', ascending=False).head(top_n)

        insights_text = ""
        for _, row in top_partners.iterrows():
            partner = row['Partner Name']
            insights_text += f"\nPartner: {partner}\n"

            for kpi in self.parameter_kpis:
                value = round(row[kpi], 2)
                prompt = f"""
You are a performance analyst. Analyze the KPI '{kpi}' for partner '{partner}'.
KPI Value: {value}%

Generate short insights with clear structure:
1. Descriptive Insights: How the partner is performing on this KPI and what stands out.
2. Prescriptive Insights: Key actions or focus points to enhance this KPI.
"""
                try:
                    resp = self.model.generate_content(
                        prompt,
                        generation_config={
                            "temperature": 0.7,
                            "top_p": 0.9,
                            "top_k": 40,
                            "max_output_tokens": 1024
                        }
                    )
                    insights_text += f"{kpi} Insights:\n{resp.text}\n{'-'*60}\n"
                except Exception as e:
                    insights_text += f"Error generating insights for {partner} - {kpi}: {e}\n"

        return insights_text

    def run(self, merged_df, top_n=5):
        df_norm = self.normalize_kpis(merged_df)
        df_scores = self.compute_consolidated_score(df_norm)
        df_top = self.get_top_partners(df_scores, top_n=top_n)

        print("\n=== CONSOLIDATED SCORE LEVEL INSIGHTS ===")
        consolidated_output = self.generate_consolidated_insights(df_top)
        print(consolidated_output)

        print("\n=== PARAMETER-WISE KPI INSIGHTS ===")
        parameter_output = self.generate_parameter_insights(merged_df, top_n=top_n)
        print(parameter_output)

        return consolidated_output, parameter_output

# usage
import pandas as pd
import concurrent.futures
import google.generativeai as genai

class PerformanceInsightGenerator:
    """
    Generates deep descriptive and prescriptive insights:
    a) Consolidated Score Level (for Employees/Partners)
    b) Parameter-Wise Scores
    """

    def __init__(self, api_key):
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel("gemini-2.0-flash")

        # KPI definitions
        self.kpi_percent_cols = [
            'Equity Net Sales_% Achievement',
            'SIP Sales_% Achievement',
            'Insurance_% Achievement',
            'Client Acquisition_% Achievement',
            'LAS_% Achievement'
        ]
        self.kpi_numeric_cols = [
            'Net Sales through MARS',
            'Net NJ PMS Sales',
            'SIP to Net Sales Ratio_Ratio'
        ]
        self.parameter_kpis = [
            'Equity Net Sales_% Achievement',
            'SIP Sales_% Achievement',
            'Client Acquisition_% Achievement',
            'Net Sales through MARS',
            'LAS_% Achievement'
        ]
        self.all_kpis = self.kpi_percent_cols + self.kpi_numeric_cols

    def normalize_kpis(self, df):
        df_norm = df.copy()
        for col in self.all_kpis:
            max_val = df_norm[col].max()
            df_norm[col + '_Normalized'] = (
                df_norm[col] / max_val * 100 if max_val != 0 else 0
            )
        return df_norm

    def compute_consolidated_score(self, df):
        norm_cols = [col + '_Normalized' for col in self.all_kpis]
        df_scores = df.groupby('Partner Name')[norm_cols].mean().reset_index()
        df_scores['Consolidated Score'] = df_scores[norm_cols].mean(axis=1)
        return df_scores

    def get_top_partners(self, df_scores, top_n=5):
        return df_scores.sort_values(by='Consolidated Score', ascending=False).head(top_n)

    def generate_consolidated_insights(self, df_top):
        def generate_for_partner(row):
            partner = row['Partner Name']
            cons_score = round(row['Consolidated Score'], 2)
            kpi_scores = {kpi: round(row[kpi + '_Normalized'], 2) for kpi in self.all_kpis}

            prompt = f"""
You are an expert business analyst. Analyze the performance of partner '{partner}'.
Consolidated Score: {cons_score}%
Individual KPI Scores: {kpi_scores}

Generate concise yet insightful text:
1. Descriptive Insights: How the partner is performing overall and key strengths/weaknesses.
2. Prescriptive Insights: Actions or focus areas to improve performance.
            """
            try:
                resp = self.model.generate_content(
                    prompt,
                    generation_config={
                        "temperature": 0.7,
                        "top_p": 0.9,
                        "top_k": 40,
                        "max_output_tokens": 1024
                    }
                )
                return f"\nPartner: {partner}\n{resp.text}\n{'-'*90}\n"
            except Exception as e:
                return f"Error generating insights for {partner}: {e}\n"

        with concurrent.futures.ThreadPoolExecutor() as executor:
            results = list(executor.map(generate_for_partner, [row for _, row in df_top.iterrows()]))
        return "\n".join(results)

    # ---------------------------------------------------
    # (b) Parameter-Wise KPI Insights
    # ---------------------------------------------------
    def generate_parameter_insights(self, df, top_n=5):
        df['Total_Performance'] = df[self.parameter_kpis].mean(axis=1)
        top_partners = df.sort_values(by='Total_Performance', ascending=False).head(top_n)

        insights_text = ""
        for _, row in top_partners.iterrows():
            partner = row['Partner Name']
            insights_text += f"\nPartner: {partner}\n"

            for kpi in self.parameter_kpis:
                value = round(row[kpi], 2)
                prompt = f"""
You are a performance analyst. Analyze the KPI '{kpi}' for partner '{partner}'.
KPI Value: {value}%

Generate short insights with clear structure:
1. Descriptive Insights: How the partner is performing on this KPI and what stands out.
2. Prescriptive Insights: Key actions or focus points to enhance this KPI.
"""
                try:
                    resp = self.model.generate_content(
                        prompt,
                        generation_config={
                            "temperature": 0.7,
                            "top_p": 0.9,
                            "top_k": 40,
                            "max_output_tokens": 1024
                        }
                    )
                    insights_text += f"{kpi} Insights:\n{resp.text}\n{'-'*60}\n"
                except Exception as e:
                    insights_text += f"Error generating insights for {partner} - {kpi}: {e}\n"

        return insights_text

    def run(self, merged_df, top_n=5):
        df_norm = self.normalize_kpis(merged_df)
        df_scores = self.compute_consolidated_score(df_norm)
        df_top = self.get_top_partners(df_scores, top_n=top_n)

        print("\n=== CONSOLIDATED SCORE LEVEL INSIGHTS ===")
        consolidated_output = self.generate_consolidated_insights(df_top)
        print(consolidated_output)

        print("\n=== PARAMETER-WISE KPI INSIGHTS ===")
        parameter_output = self.generate_parameter_insights(merged_df, top_n=top_n)
        print(parameter_output)

        return consolidated_output, parameter_output


In [93]:
generator = PerformanceInsightGenerator(api_key=google_api_key)
consolidated_output, parameter_output = generator.run(merged_df, top_n=5)

print("\n--- Consolidated Score Insights ---")
print(consolidated_output)

print("\n--- Parameter-wise KPI Insights ---")
print(parameter_output)



=== CONSOLIDATED SCORE LEVEL INSIGHTS ===

Partner: VA Financial Products Distribution LLP
Here's an analysis of VA Financial Products Distribution LLP's performance:

**1. Descriptive Insights:**

VA Financial Products Distribution LLP demonstrates a **below-average consolidated score of 54.7%**.  Their strengths lie in **SIP Sales (80.07% achievement), Net Sales through MARS (82.46%), SIP to Net Sales Ratio (82.98%) and NJ PMS Sales (69.24%)**, indicating a solid performance in these areas. However, significant weaknesses exist in **Insurance (0.18% achievement) and LAS (9.19% achievement)**. **Client Acquisition (44.92% achievement)** also lags behind, and **Equity Net Sales (68.52% achievement)** could be improved. This suggests a potential over-reliance on SIPs and MARS, with underperformance in other key product lines and client growth.

**2. Prescriptive Insights:**

*   **Address Insurance and LAS Performance:** Immediately investigate the reasons for near-zero achievement in 

In [120]:
df2 = merged_df.copy()

In [121]:
df2.head()

Unnamed: 0,Sr No.,Partner Code,Partner Name,Center,Category,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year,Broker Code,Doer Name,Doer Type,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement,Net Sales through MARS_Normalized,Total_Performance
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,23676,SUBRATA MAITY,Fundz Express,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82,0.002559,5954.006
1,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,20361,SUBRATA MAITY,Fundz Express,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04,0.12523,290762.85
2,3,24695,CHINTU KUMAR SHAW,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1200000,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,27000,0.0,0.0,0.0,0.0,2023,24695,SUBRATA MAITY,Fundz Express,1200000,0.0,0.0,,,,27000,0.0,0.0,16,3.0,18.75,0.0,0,,0.0,0.0,2.81,0.0,4.6875
3,4,24040,GOURAB PURKAIT,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,26998.7,1.8,26998.7,0,0,0,0,0,0,0,0,0,0,33750,10499.49,31.11,10499.49,0.0,2023,24040,SUBRATA MAITY,Fundz Express,1500000,26998.7,1.8,,,,33750,10499.49,31.11,20,3.0,15.0,3.11,0,0.0,6999.66,2.33,14.59,0.0,9.582
4,5,25776,RAJ KISHORE BARIK,24 SOUTH PARGANA,NON D,SUBRATA MAITY,600000,496499.92,82.75,1499.92,0,0,0,0,0,0,0,0,0,495000,13500,1499.92,11.11,1499.92,0.0,2023,25776,SUBRATA MAITY,Fundz Express,600000,496499.92,82.75,,,,13500,1499.92,11.11,8,3.0,37.5,0.0,0,,0.0,0.0,40.82,0.042644,123782.84


In [240]:
#PARTNER CONCENTRATION
import pandas as pd
import google.generativeai as genai

# ================= GEMINI SETUP =================
genai.configure(api_key=gemini_api_key)

class DeepInsightGenerator:
    """Uses Gemini LLM to generate descriptive & prescriptive insights dynamically."""
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str) -> str:
        try:
            response = self.model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            return f"Error generating insight: {e}"


# ================= CONFIG =================
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]

thresholds = [25, 50, 75, 80, 90]


# ================= BASE FUNCTION =================
def partner_concentration(df, kpi, thresholds=thresholds):
    df_sorted = (
        df.groupby('Partner Name')[kpi]
        .sum()
        .sort_values(ascending=False)
        .reset_index()
    )

    total_kpi = df_sorted[kpi].sum()
    df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
    df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100

    total_partners = len(df_sorted)
    conc_summary = {}

    for t in thresholds:
        # Find how many partners are needed to reach or exceed t% of total KPI
        num_partners = (df_sorted['Cumulative KPI %'] >= t).idxmax() + 1
        conc_summary[f'Partners for {t}%'] = num_partners
        conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)

    return conc_summary, total_partners, df_sorted

# ================= BRANCH MANAGER CONCENTRATION (TOP 5 BMs) =================
def top_branch_manager_concentration(df, kpi, top_bms=5, top_partners=2, thresholds=thresholds):
    bm_insights = {}

    # Find top 5 BMs by total KPI
    top_bms_list = (
        df.groupby('BM')[kpi]
        .sum()
        .sort_values(ascending=False)
        .head(top_bms)
        .index
        .tolist()
    )

    for bm in top_bms_list:
        group = df[df['BM'] == bm]
        conc_summary, total_partners, df_sorted = partner_concentration(group, kpi, thresholds)
        
        # === Create the readable concentration text (like your example) ===
        bm_text = f"Branch Manager: {bm}\nKPI: {kpi}\n"
        bm_text += f"Total Partners under {bm}: {total_partners}\n\n"
        bm_text += f"Partner Concentration Insight for KPI '{kpi}':\n"
        for t in thresholds:
            bm_text += (
                f" - Top {conc_summary[f'Partners for {t}%']} partners "
                f"({conc_summary[f'% of Total Partners for {t}%']}% of total partners) "
                f"contribute {t}% of total {kpi}.\n"
            )

        # === Show top 2 partners for context ===
        top2 = df_sorted.head(top_partners)[['Partner Name', kpi, 'Cumulative KPI %']]
        bm_text += "\nTop 2 Partners (for reference):\n"
        for _, row in top2.iterrows():
            bm_text += f" - {row['Partner Name']}: {row[kpi]:,.2f} ({row['Cumulative KPI %']:.2f}% cumulative)\n"

        bm_insights[bm] = bm_text

    return bm_insights


# ================= GEMINI INSIGHTS =================
def generate_branch_manager_deep_insights(df, kpi_list, generator, top_bms=5, top_partners=2):
    bm_all_insights = {}

    for kpi in kpi_list:
        bm_summaries = top_branch_manager_concentration(df, kpi, top_bms, top_partners)

        for bm, text in bm_summaries.items():
            prompt = f"""
            You are a financial performance analyst.
            Below is data for Branch Manager '{bm}' on KPI '{kpi}' showing partner concentration.
            Analyze how concentrated performance is (few vs many partners contributing).
            Write 3–4 crisp insights about:
            1. Level of concentration (high/medium/low),
            2. Implications on business dependency,
            3. Partner development or risk recommendations.

            Data:
            {text}
            """
            insight = generator.generate_insight(prompt)
            bm_all_insights.setdefault(bm, {})[kpi] = text + "\n\nLLM Insights:\n" + insight

    return bm_all_insights


# ================= RUN =================
generator = DeepInsightGenerator()

branch_manager_insights = generate_branch_manager_deep_insights(
    merged_with_hierarchyy, kpi_list, generator, top_bms=5, top_partners=2
)

# ================= DISPLAY =================
for bm, kpis in branch_manager_insights.items():
    print(f"\n######## Branch Manager: {bm} ########")
    for kpi, insight in kpis.items():
        print(f"\n=== {kpi} ===")
        print(insight)
        print("-" * 80)
        




#### Leaders & laggers

import pandas as pd
import google.generativeai as genai

# ================= GEMINI SETUP =================
genai.configure(api_key=gemini_api_key)

class DeepInsightGenerator:
    """Uses Gemini LLM to generate descriptive & prescriptive insights dynamically."""
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str) -> str:
        try:
            response = self.model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            return f"Error generating insight: {e}"


# ================= CONFIG =================
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]


# ================= LEADERS & LAGGERS =================
def branch_manager_leaders_laggers(df, kpi, top_bms=5, top_n=3):
    """
    For each of the top BMs (by KPI total), find top and bottom partners
    and quantify their share of performance.
    """
    bm_results = {}

    # Top 5 BMs overall for that KPI
    top_bms_list = (
        df.groupby('BM')[kpi]
        .sum()
        .sort_values(ascending=False)
        .head(top_bms)
        .index
        .tolist()
    )

    for bm in top_bms_list:
        bm_df = df[df['BM'] == bm]

        # Sort partners under that BM
        perf = (
            bm_df.groupby('Partner Name')[kpi]
            .sum()
            .sort_values(ascending=False)
            .reset_index()
        )

        total_perf = perf[kpi].sum()

        # Leaders (Top N)
        leaders = perf.head(top_n).copy()
        leaders['% Share'] = round(leaders[kpi] / total_perf * 100, 2)

        # Laggers (Bottom N)
        laggers = perf.tail(top_n).copy()
        laggers['% Share'] = round(laggers[kpi] / total_perf * 100, 2)

        # Performance summary
        leaders_contrib = leaders['% Share'].sum()
        laggers_contrib = laggers['% Share'].sum()

        summary_text = f"Branch Manager: {bm}\nKPI: {kpi}\n\n"
        summary_text += f"Total Partners under {bm}: {len(perf)}\n"
        summary_text += f"Top {top_n} Leaders contribute: {leaders_contrib:.2f}% of total {kpi}\n"
        summary_text += f"Bottom {top_n} Laggers contribute: {laggers_contrib:.2f}% of total {kpi}\n\n"

        summary_text += "Top Performing (Leader) Partners:\n"
        for _, row in leaders.iterrows():
            summary_text += f" - {row['Partner Name']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"

        summary_text += "\nLow Performing (Lagger) Partners:\n"
        for _, row in laggers.iterrows():
            summary_text += f" - {row['Partner Name']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"

        bm_results[bm] = summary_text

    return bm_results


# ================= GEMINI INSIGHT GENERATION =================
def generate_leaders_laggers_insights(df, kpi_list, generator, top_bms=5, top_n=3):
    all_bm_insights = {}

    for kpi in kpi_list:
        bm_summaries = branch_manager_leaders_laggers(df, kpi, top_bms, top_n)

        for bm, text in bm_summaries.items():
            prompt = f"""
            You are a financial performance analyst.
            Below is data for Branch Manager '{bm}' on KPI '{kpi}' showing top and bottom performing partners.

            Write a professional 3–4 bullet point insight covering:
            1. Performance dependency (e.g., dominated by few top partners or evenly spread),
            2. Impact of leaders on total performance,
            3. Weak link from laggers and improvement recommendations,
            4. Any early warning or strategic focus points.

            Data:
            {text}
            """
            insight = generator.generate_insight(prompt)
            all_bm_insights.setdefault(bm, {})[kpi] = text + "\n\nLLM Insights:\n" + insight

    return all_bm_insights


# ================= EXECUTION =================
generator = DeepInsightGenerator()

leaders_laggers_insights = generate_leaders_laggers_insights(
    merged_with_hierarchyy, kpi_list, generator, top_bms=5, top_n=3
)


# ================= DISPLAY =================
for bm, kpis in leaders_laggers_insights.items():
    print(f"\n######## Branch Manager: {bm} ########")
    for kpi, insight in kpis.items():
        print(f"\n=== {kpi} ===")
        print(insight)
        print("-" * 80)



##### sudden change
import pandas as pd
import numpy as np
import google.generativeai as genai

# ================ GEMINI SETUP ================
gemini_api_key = google_api_key  # Replace with your valid Gemini key
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

# ================ CONFIG ================
kpi_cols = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]
threshold_pct_change = 50   # % threshold for major YoY change
year_col = 'FY_Year'
partner_col = 'Partner Name'
bm_col = 'BM'
top_bm = 2                  # Top 2 Branch Managers
top_partners = 2            # Top 2 Partners per BM


# ================ STEP 1: DETECT DRASTIC CHANGES ================
def detect_drastic_changes(df):
    """Detect YoY KPI changes (above threshold) per partner per BM."""
    df_sorted = df.sort_values([bm_col, partner_col, year_col])
    change_records = []

    for (bm, partner), group in df_sorted.groupby([bm_col, partner_col]):
        group = group.sort_values(year_col)
        for i in range(1, len(group)):
            prev, curr = group.iloc[i - 1], group.iloc[i]

            for kpi in kpi_cols:
                prev_val, curr_val = prev[kpi], curr[kpi]
                if pd.isna(prev_val) or pd.isna(curr_val) or prev_val == 0:
                    continue
                pct_change = ((curr_val - prev_val) / abs(prev_val)) * 100

                if abs(pct_change) >= threshold_pct_change:
                    change_records.append({
                        'BM': bm,
                        'Partner Name': partner,
                        'KPI': kpi,
                        'Year From': prev[year_col],
                        'Year To': curr[year_col],
                        'Previous Value': round(prev_val, 2),
                        'Current Value': round(curr_val, 2),
                        '% Change': round(pct_change, 2)
                    })

    return pd.DataFrame(change_records)


# ================ STEP 2: SELECT TOP 2 BMs & TOP 2 PARTNERS ================
def get_top_bm_partners(change_df):
    """Get top 2 BMs and within each, top 2 partners with max change magnitude."""
    if change_df.empty:
        return pd.DataFrame()

    # Rank BMs by total magnitude of change
    top_bms = (
        change_df.groupby('BM')['% Change']
        .apply(lambda x: x.abs().mean())
        .sort_values(ascending=False)
        .head(top_bm)
        .index
    )

    top_df = change_df[change_df['BM'].isin(top_bms)]
    final_records = []

    # Within each BM, take top 2 partners
    for bm in top_bms:
        bm_data = top_df[top_df['BM'] == bm]
        top_partners_for_bm = (
            bm_data.groupby('Partner Name')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(top_partners)
            .index
        )
        final_records.append(bm_data[bm_data['Partner Name'].isin(top_partners_for_bm)])

    return pd.concat(final_records)


# ================ STEP 3: GENERATE LLM INSIGHTS ================
def generate_bm_insights(change_df):
    """Generate insights for each BM and its top 2 partners."""
    bm_insights = {}

    if change_df.empty:
        return {"All": "No significant performance changes detected above threshold."}

    for bm, group in change_df.groupby('BM'):
        combined_text = f"\n📊 Branch Manager: {bm}\n"

        # Top 2 partners for this BM
        top_partner_names = (
            group.groupby('Partner Name')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(top_partners)
            .index
        )

        for partner in top_partner_names:
            partner_data = group[group['Partner Name'] == partner]

            prompt = f"""
You are a senior financial analyst.
Analyze the KPI performance changes for partner '{partner}' under Branch Manager '{bm}'.

Below is year-over-year data showing KPI shifts beyond ±{threshold_pct_change}%:
{partner_data}

(3–4 bullet points) that :
1. Identifies which KPIs show the largest positive or negative shifts, with actual % changes.
2. Interprets what these changes reveal about the partner’s business direction.
3. Explains how these shifts impact the Branch Manager’s overall portfolio balance.
4. Provides 1–2 quantified, strategic recommendations for the BM — such as stabilizing declines or scaling strong areas.

Tone: Data-driven, concise, and suitable for a sales performance dashboard.
Use ↑ and ↓ symbols for directionality.
Avoid generic phrasing; use the numbers provided.
"""
            try:
                resp = model.generate_content(
                    prompt,
                    generation_config={
                        "temperature": 0.6,
                        "top_p": 0.9,
                        "max_output_tokens": 800
                    }
                )
                combined_text += f"\n🔹 Partner: {partner}\n"
                combined_text += resp.text.strip() + "\n" + ("-" * 100) + "\n"
            except Exception as e:
                combined_text += f"⚠️ Error generating insight for {partner}: {e}\n"

        bm_insights[bm] = combined_text

    return bm_insights


# ================ STEP 4: EXECUTION ================
change_df = detect_drastic_changes(merged_with_hierarchyy)
filtered_df = get_top_bm_partners(change_df)
bm_partner_insights = generate_bm_insights(filtered_df)

# ================ STEP 5: DISPLAY OUTPUT ================
print("\n================= DRASTIC CHANGE SUMMARY (Filtered) =================\n")
print(filtered_df.head(10))

print("\n================= BRANCH MANAGER INSIGHTS =================\n")
for bm, insight in bm_partner_insights.items():
    print(insight)
    print("=" * 120)


### AREA TO FOCUS 
import pandas as pd
import numpy as np
import re
import google.generativeai as genai
from sqlalchemy import create_engine

# ------------------ GEMINI CONFIG ------------------
gemini_api_key = google_api_key  
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

# ------------------ MYSQL CONNECTION ------------------
engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")

# ------------------ COLUMNS CONFIG ------------------
kpi_cols = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]
partner_col = 'Partner Name'
bm_col = 'BM'
target_col_suffix = 'Target'
top_bm = 3
top_partners = 2


# ================ STEP 1: Identify Performance Gaps ================
def identify_focus_areas(df):
    """Calculate % gap vs target or overall average per partner per BM."""
    focus_records = []
    kpi_avg = df[kpi_cols].mean()

    for (bm, partner), group in df.groupby([bm_col, partner_col]):
        partner_row = group.iloc[-1]
        gaps = {}

        for kpi in kpi_cols:
            possible_targets = [col for col in df.columns if target_col_suffix in col and kpi in col]
            target_value = partner_row[possible_targets[0]] if possible_targets else kpi_avg[kpi]

            if target_value and not pd.isna(target_value):
                gap_pct = ((target_value - partner_row[kpi]) / target_value) * 100
            else:
                gap_pct = 0

            gaps[kpi] = round(gap_pct, 2)

        max_gap = max(gaps.values())
        sorted_gaps = sorted(gaps.items(), key=lambda x: x[1], reverse=True)

        focus_records.append({
            'BM': bm,
            'Partner Name': partner,
            'Focus KPIs': sorted_gaps,
            'Max Gap': max_gap
        })

    return pd.DataFrame(focus_records)


# ================ STEP 2: Filter Top 3 BMs & Top 2 Partners ================
def get_top_bm_partners(focus_df):
    """Select top 3 BMs and within each, top 2 partners by max gap."""
    bm_rank = (
        focus_df.groupby('BM')['Max Gap']
        .max()
        .sort_values(ascending=False)
        .head(top_bm)
        .index
    )
    filtered = focus_df[focus_df['BM'].isin(bm_rank)]

    top_data = (
        filtered.groupby('BM', group_keys=False)
        .apply(lambda x: x.sort_values('Max Gap', ascending=False).head(top_partners))
        .reset_index(drop=True)
    )
    return top_data


# ================ STEP 3: Generate Insights (LLM) ================
def generate_partner_focus_insights(focus_df):
    """Generate LLM-based recommendations per BM and Partner."""
    bm_insights = {}

    for bm, group in focus_df.groupby('BM'):
        bm_insights[bm] = {}

        for _, row in group.iterrows():
            partner = row['Partner Name']
            focus_kpis = row['Focus KPIs']

            prompt = f"""
You are a senior business analyst.
Analyze the performance gaps for partner '{partner}' under Branch Manager '{bm}'.

Below are the KPIs and their % gaps (higher = more underperformance):
{focus_kpis}

Your task:
- Focus only on quantitative insights — do not generate generic text.
- Use the % gaps to suggest realistic improvement targets (X%) based on the data.
- Estimate how much total branch performance (Y%) could improve if this partner closes these gaps partially or fully.

Generate exactly 3–4 concise insights:
1. Identify the top 2–3 KPIs with highest % gaps.
2. Suggest realistic % improvement targets (X%) and expected partner-level performance gains.
3. Estimate branch-level improvement (Y%) if achieved.
4. End with one actionable recommendation for the Branch Manager.
"""
            try:
                resp = model.generate_content(
                    prompt,
                    generation_config={
                        "temperature": 0.7,
                        "top_p": 0.9,
                        "top_k": 40,
                        "max_output_tokens": 800
                    }
                )
                bm_insights[bm][partner] = resp.text.strip()
            except Exception as e:
                bm_insights[bm][partner] = f"Error generating insight: {e}"

    return bm_insights


# ================ STEP 4: Extract Partner Name (Helper) ================
def extract_partner_name(text):
    match = re.search(r"Partner\s*:\s*([A-Za-z\s\.\-']+)", text)
    if match:
        return match.group(1).strip()
    return "Unknown Partner"


# ================ STEP 5: Save Insights to MySQL ================
def save_insights_to_mysql(insight_dict, insight_type="Focus Insight"):
    records = []

    for bm, nested_dict in insight_dict.items():
        for partner, insight_text in nested_dict.items():
            partner_name = extract_partner_name(insight_text) if partner == "Unknown Partner" else partner
            records.append({
                "BM": bm,
                "Partner_Name": partner_name,
                "Insight_Type": insight_type,
                "Insight": insight_text
            })

    df_to_save = pd.DataFrame(records)
    df_to_save.to_sql(name="AllInsights", con=engine, if_exists="append", index=False)
    print(f" Saved {len(df_to_save)} insights successfully to MySQL.")


# ================ STEP 6: EXECUTION ================
focus_df = identify_focus_areas(merged_with_hierarchyy)
filtered_df = get_top_bm_partners(focus_df)
bm_focus_insights = generate_partner_focus_insights(filtered_df)

print("\n================= BRANCH MANAGER FOCUS INSIGHTS =================\n")
for bm, partners in bm_focus_insights.items():
    print(f"\n######## Branch Manager: {bm} ########\n")
    for partner, insight in partners.items():
        print(f"\nPartner: {partner}\n")
        print(insight)
        print("-" * 100)
    print("=" * 120)

# Save all insights into database
save_insights_to_mysql(bm_focus_insights, insight_type="Area to Focus")



KeyError: 'Partner Name'

In [242]:
import pandas as pd
import numpy as np
import google.generativeai as genai
from typing import List, Dict, Tuple, Optional
import logging
from dataclasses import dataclass

# ================= CONFIGURATION =================
@dataclass
class InsightConfig:
    """Configuration for insight generation"""
    kpi_list: List[str] = None
    hierarchy_levels: List[str] = None
    thresholds: List[int] = None
    top_managers: int = 5
    top_partners: int = 3
    change_threshold: float = 50.0
    
    def __post_init__(self):
        if self.kpi_list is None:
            self.kpi_list = [
                'Equity Sales',
                'SIP Sales Achievement', 
                'Net Sales through MARS',
                'Investment Net Sales Achievement'
            ]
        if self.hierarchy_levels is None:
            self.hierarchy_levels = ['ZM', 'BM', 'RM']
        if self.thresholds is None:
            self.thresholds = [25, 50, 75, 80, 90]

# ================= GEMINI SERVICE =================
class GeminiService:
    """Handles all Gemini LLM interactions"""
    def __init__(self, api_key: str, model_name: str = "gemini-2.0-flash"):
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel(model_name)
        self.logger = logging.getLogger(__name__)
    
    def generate_insight(self, prompt: str, temperature: float = 0.7) -> str:
        """Generate insight using Gemini model"""
        try:
            response = self.model.generate_content(
                prompt,
                generation_config={
                    "temperature": temperature,
                    "top_p": 0.9,
                    "max_output_tokens": 800
                }
            )
            return response.text.strip()
        except Exception as e:
            self.logger.error(f"Gemini API error: {e}")
            return f"Error generating insight: {e}"

# ================= BASE INSIGHT GENERATOR =================
class InsightGenerator:
    """Base class for all insight generators"""
    def __init__(self, gemini_service: GeminiService, config: InsightConfig):
        self.gemini = gemini_service
        self.config = config
        self.logger = logging.getLogger(__name__)
    
    def get_top_managers(self, df: pd.DataFrame, kpi: str, hierarchy_level: str) -> List[str]:
        """Get top managers by KPI for given hierarchy level"""
        return (
            df.groupby(hierarchy_level)[kpi]
            .sum()
            .sort_values(ascending=False)
            .head(self.config.top_managers)
            .index
            .tolist()
        )

# ================= PARTNER CONCENTRATION =================
class ConcentrationInsightGenerator(InsightGenerator):
    """Generates partner concentration insights"""
    
    def calculate_concentration(self, df: pd.DataFrame, kpi: str) -> Tuple[Dict, int, pd.DataFrame]:
        """Calculate concentration metrics for a group"""
        df_sorted = (
            df.groupby('Partner Name')[kpi]
            .sum()
            .sort_values(ascending=False)
            .reset_index()
        )

        total_kpi = df_sorted[kpi].sum()
        if total_kpi == 0:
            return {}, 0, df_sorted
            
        df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
        df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100

        total_partners = len(df_sorted)
        conc_summary = {}

        for threshold in self.config.thresholds:
            try:
                num_partners = (df_sorted['Cumulative KPI %'] >= threshold).idxmax() + 1
                conc_summary[f'Partners for {threshold}%'] = num_partners
                conc_summary[f'% of Total Partners for {threshold}%'] = round(
                    num_partners / total_partners * 100, 2
                )
            except (ValueError, IndexError):
                conc_summary[f'Partners for {threshold}%'] = total_partners
                conc_summary[f'% of Total Partners for {threshold}%'] = 100.0

        return conc_summary, total_partners, df_sorted
    
    def generate_manager_concentration(self, df: pd.DataFrame, kpi: str, hierarchy_level: str) -> Dict:
        """Generate concentration insights for all managers at given hierarchy level"""
        manager_insights = {}
        top_managers = self.get_top_managers(df, kpi, hierarchy_level)
        
        for manager in top_managers:
            manager_data = df[df[hierarchy_level] == manager]
            conc_summary, total_partners, df_sorted = self.calculate_concentration(manager_data, kpi)
            
            if not conc_summary:
                continue
                
            # Create concentration text
            insight_text = self._format_concentration_text(
                manager, kpi, hierarchy_level, total_partners, conc_summary, df_sorted
            )
            
            # Generate LLM insights
            llm_insight = self._generate_concentration_llm_insight(
                manager, kpi, hierarchy_level, insight_text
            )
            
            manager_insights[manager] = insight_text + "\n\nLLM Insights:\n" + llm_insight
            
        return manager_insights
    
    def _format_concentration_text(self, manager: str, kpi: str, level: str, 
                                 total_partners: int, conc_summary: Dict, df_sorted: pd.DataFrame) -> str:
        """Format concentration analysis into readable text"""
        text = f"{level}: {manager}\nKPI: {kpi}\n"
        text += f"Total Partners under {manager}: {total_partners}\n\n"
        text += f"Partner Concentration Insight for KPI '{kpi}':\n"
        
        for threshold in self.config.thresholds:
            partners_key = f'Partners for {threshold}%'
            percent_key = f'% of Total Partners for {threshold}%'
            
            if partners_key in conc_summary:
                text += (
                    f" - Top {conc_summary[partners_key]} partners "
                    f"({conc_summary[percent_key]}% of total partners) "
                    f"contribute {threshold}% of total {kpi}.\n"
                )
        
        # Add top partners for context
        top_partners_data = df_sorted.head(self.config.top_partners)[['Partner Name', kpi, 'Cumulative KPI %']]
        text += f"\nTop {self.config.top_partners} Partners (for reference):\n"
        for _, row in top_partners_data.iterrows():
            text += f" - {row['Partner Name']}: {row[kpi]:,.2f} ({row['Cumulative KPI %']:.2f}% cumulative)\n"
            
        return text
    
    def _generate_concentration_llm_insight(self, manager: str, kpi: str, level: str, text: str) -> str:
        """Generate LLM insights for concentration analysis"""
        prompt = f"""
        You are a financial performance analyst.
        Below is data for {level} '{manager}' on KPI '{kpi}' showing partner concentration.
        Analyze how concentrated performance is (few vs many partners contributing).
        Write 3–4 crisp insights about:
        1. Level of concentration (high/medium/low),
        2. Implications on business dependency,
        3. Partner development or risk recommendations.

        Data:
        {text}
        """
        return self.gemini.generate_insight(prompt)

# ================= LEADERS & LAGGERS =================
class LeadersLaggersInsightGenerator(InsightGenerator):
    """Generates leaders and laggers insights"""
    
    def generate_manager_leaders_laggers(self, df: pd.DataFrame, kpi: str, hierarchy_level: str) -> Dict:
        """Generate leaders/laggers insights for all managers at given hierarchy level"""
        manager_insights = {}
        top_managers = self.get_top_managers(df, kpi, hierarchy_level)
        
        for manager in top_managers:
            manager_data = df[df[hierarchy_level] == manager]
            insight_text = self._analyze_leaders_laggers(manager, kpi, hierarchy_level, manager_data)
            
            # Generate LLM insights
            llm_insight = self._generate_leaders_laggers_llm_insight(
                manager, kpi, hierarchy_level, insight_text
            )
            
            manager_insights[manager] = insight_text + "\n\nLLM Insights:\n" + llm_insight
            
        return manager_insights
    
    def _analyze_leaders_laggers(self, manager: str, kpi: str, level: str, df: pd.DataFrame) -> str:
        """Analyze leaders and laggers for a manager"""
        # Sort partners by performance
        perf = (
            df.groupby('Partner Name')[kpi]
            .sum()
            .sort_values(ascending=False)
            .reset_index()
        )

        total_perf = perf[kpi].sum()
        if total_perf == 0:
            return f"{level}: {manager}\nKPI: {kpi}\n\nNo performance data available."

        # Leaders (Top N)
        leaders = perf.head(self.config.top_partners).copy()
        leaders['% Share'] = round(leaders[kpi] / total_perf * 100, 2)

        # Laggers (Bottom N)
        laggers = perf.tail(self.config.top_partners).copy()
        laggers['% Share'] = round(laggers[kpi] / total_perf * 100, 2)

        # Performance summary
        leaders_contrib = leaders['% Share'].sum()
        laggers_contrib = laggers['% Share'].sum()

        text = f"{level}: {manager}\nKPI: {kpi}\n\n"
        text += f"Total Partners under {manager}: {len(perf)}\n"
        text += f"Top {self.config.top_partners} Leaders contribute: {leaders_contrib:.2f}% of total {kpi}\n"
        text += f"Bottom {self.config.top_partners} Laggers contribute: {laggers_contrib:.2f}% of total {kpi}\n\n"

        text += "Top Performing (Leader) Partners:\n"
        for _, row in leaders.iterrows():
            text += f" - {row['Partner Name']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"

        text += "\nLow Performing (Lagger) Partners:\n"
        for _, row in laggers.iterrows():
            text += f" - {row['Partner Name']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"

        return text
    
    def _generate_leaders_laggers_llm_insight(self, manager: str, kpi: str, level: str, text: str) -> str:
        """Generate LLM insights for leaders/laggers analysis"""
        prompt = f"""
        You are a financial performance analyst.
        Below is data for {level} '{manager}' on KPI '{kpi}' showing top and bottom performing partners.

        Write a professional 3–4 bullet point insight covering:
        1. Performance dependency (e.g., dominated by few top partners or evenly spread),
        2. Impact of leaders on total performance,
        3. Weak link from laggers and improvement recommendations,
        4. Any early warning or strategic focus points.

        Data:
        {text}
        """
        return self.gemini.generate_insight(prompt)

# ================= DRASTIC CHANGES =================
class DrasticChangeInsightGenerator(InsightGenerator):
    """Generates insights for drastic performance changes"""
    
    def detect_drastic_changes(self, df: pd.DataFrame) -> pd.DataFrame:
        """Detect YoY KPI changes above threshold"""
        df_sorted = df.sort_values(['BM', 'Partner Name', 'FY_Year'])
        change_records = []

        for (bm, partner), group in df_sorted.groupby(['BM', 'Partner Name']):
            group = group.sort_values('FY_Year')
            for i in range(1, len(group)):
                prev, curr = group.iloc[i - 1], group.iloc[i]

                for kpi in self.config.kpi_list:
                    prev_val, curr_val = prev[kpi], curr[kpi]
                    if pd.isna(prev_val) or pd.isna(curr_val) or prev_val == 0:
                        continue
                    
                    pct_change = ((curr_val - prev_val) / abs(prev_val)) * 100

                    if abs(pct_change) >= self.config.change_threshold:
                        change_records.append({
                            'BM': bm,
                            'Partner Name': partner,
                            'KPI': kpi,
                            'Year From': prev['FY_Year'],
                            'Year To': curr['FY_Year'],
                            'Previous Value': round(prev_val, 2),
                            'Current Value': round(curr_val, 2),
                            '% Change': round(pct_change, 2)
                        })

        return pd.DataFrame(change_records)
    
    def generate_change_insights(self, df: pd.DataFrame) -> Dict:
        """Generate insights for drastic changes"""
        change_df = self.detect_drastic_changes(df)
        
        if change_df.empty:
            return {"All": "No significant performance changes detected above threshold."}

        # Get top managers with most changes
        top_bms = (
            change_df.groupby('BM')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(self.config.top_managers)
            .index
        )

        bm_insights = {}
        for bm in top_bms:
            bm_data = change_df[change_df['BM'] == bm]
            insight_text = self._generate_bm_change_insight(bm, bm_data)
            bm_insights[bm] = insight_text
            
        return bm_insights
    
    def _generate_bm_change_insight(self, bm: str, bm_data: pd.DataFrame) -> str:
        """Generate change insights for a specific BM"""
        text = f"\n📊 Branch Manager: {bm}\n"

        # Top partners for this BM
        top_partner_names = (
            bm_data.groupby('Partner Name')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(self.config.top_partners)
            .index
        )

        for partner in top_partner_names:
            partner_data = bm_data[bm_data['Partner Name'] == partner]

            prompt = f"""
You are a senior financial analyst.
Analyze the KPI performance changes for partner '{partner}' under Branch Manager '{bm}'.

Below is year-over-year data showing KPI shifts beyond ±{self.config.change_threshold}%:
{partner_data}

(3–4 bullet points) that:
1. Identifies which KPIs show the largest positive or negative shifts, with actual % changes.
2. Interprets what these changes reveal about the partner's business direction.
3. Explains how these shifts impact the Branch Manager's overall portfolio balance.
4. Provides 1–2 quantified, strategic recommendations for the BM.

Tone: Data-driven, concise, and suitable for a sales performance dashboard.
Use ↑ and ↓ symbols for directionality.
Avoid generic phrasing; use the numbers provided.
"""
            llm_insight = self.gemini.generate_insight(prompt, temperature=0.6)
            text += f"\n🔹 Partner: {partner}\n"
            text += llm_insight + "\n" + ("-" * 100) + "\n"

        return text

# ================= FOCUS AREAS =================
class FocusAreaInsightGenerator(InsightGenerator):
    """Generates focus area insights"""
    
    def identify_focus_areas(self, df: pd.DataFrame) -> pd.DataFrame:
        """Identify performance gaps vs targets or averages"""
        focus_records = []
        kpi_avg = df[self.config.kpi_list].mean()

        for (bm, partner), group in df.groupby(['BM', 'Partner Name']):
            partner_row = group.iloc[-1]  # Latest data
            gaps = {}

            for kpi in self.config.kpi_list:
                # Look for target columns
                possible_targets = [col for col in df.columns if 'Target' in col and kpi in col]
                target_value = partner_row[possible_targets[0]] if possible_targets else kpi_avg[kpi]

                if target_value and not pd.isna(target_value) and target_value != 0:
                    gap_pct = ((target_value - partner_row[kpi]) / target_value) * 100
                else:
                    gap_pct = 0

                gaps[kpi] = round(gap_pct, 2)

            max_gap = max(gaps.values()) if gaps else 0
            sorted_gaps = sorted(gaps.items(), key=lambda x: x[1], reverse=True)

            focus_records.append({
                'BM': bm,
                'Partner Name': partner,
                'Focus KPIs': sorted_gaps,
                'Max Gap': max_gap
            })

        return pd.DataFrame(focus_records)
    
    def generate_focus_insights(self, df: pd.DataFrame) -> Dict:
        """Generate focus area insights"""
        focus_df = self.identify_focus_areas(df)
        
        if focus_df.empty:
            return {"All": "No focus areas identified."}

        # Get top BMs with largest gaps
        top_bms = (
            focus_df.groupby('BM')['Max Gap']
            .max()
            .sort_values(ascending=False)
            .head(self.config.top_managers)
            .index
        )

        bm_insights = {}
        for bm in top_bms:
            bm_data = focus_df[focus_df['BM'] == bm]
            insight_text = self._generate_bm_focus_insight(bm, bm_data)
            bm_insights[bm] = insight_text
            
        return bm_insights
    
    def _generate_bm_focus_insight(self, bm: str, bm_data: pd.DataFrame) -> str:
        """Generate focus insights for a specific BM"""
        text = f"\n Branch Manager: {bm}\n"

        # Top partners with largest gaps
        top_partners = (
            bm_data.sort_values('Max Gap', ascending=False)
            .head(self.config.top_partners)
        )

        for _, row in top_partners.iterrows():
            partner = row['Partner Name']
            focus_kpis = row['Focus KPIs']

            prompt = f"""
You are a senior business analyst.
Analyze the performance gaps for partner '{partner}' under Branch Manager '{bm}'.

Below are the KPIs and their % gaps (higher = more underperformance):
{focus_kpis}

Your task:
- Focus only on quantitative insights — do not generate generic text.
- Use the % gaps to suggest realistic improvement targets (X%) based on the data.
- Estimate how much total branch performance (Y%) could improve if this partner closes these gaps partially or fully.

Generate exactly 3–4 concise insights:
1. Identify the top 2–3 KPIs with highest % gaps.
2. Suggest realistic % improvement targets (X%) and expected partner-level performance gains.
3. Estimate branch-level improvement (Y%) if achieved.
4. End with one actionable recommendation for the Branch Manager.
"""
            llm_insight = self.gemini.generate_insight(prompt, temperature=0.7)
            text += f"\nPartner: {partner}\n"
            text += llm_insight + "\n" + ("-" * 100) + "\n"

        return text

# ================= MAIN INSIGHT ORCHESTRATOR =================
class InsightOrchestrator:
    """Orchestrates all insight generation across hierarchy levels"""
    
    def __init__(self, gemini_api_key: str, config: InsightConfig = None):
        self.config = config or InsightConfig()
        self.gemini_service = GeminiService(gemini_api_key)
        
        # Initialize all generators
        self.concentration_gen = ConcentrationInsightGenerator(self.gemini_service, self.config)
        self.leaders_laggers_gen = LeadersLaggersInsightGenerator(self.gemini_service, self.config)
        self.drastic_change_gen = DrasticChangeInsightGenerator(self.gemini_service, self.config)
        self.focus_area_gen = FocusAreaInsightGenerator(self.gemini_service, self.config)
        
        self.logger = logging.getLogger(__name__)
    
    def generate_all_insights(self, df: pd.DataFrame) -> Dict:
        """Generate all types of insights across all hierarchy levels"""
        all_insights = {}
        
        for hierarchy_level in self.config.hierarchy_levels:
            if hierarchy_level not in df.columns:
                self.logger.warning(f"Hierarchy level {hierarchy_level} not found in dataframe")
                continue
                
            level_insights = {}
            
            # Generate insights for each KPI at this hierarchy level
            for kpi in self.config.kpi_list:
                if kpi not in df.columns:
                    continue
                    
                kpi_insights = {}
                
                # 1. Partner Concentration
                try:
                    kpi_insights['concentration'] = self.concentration_gen.generate_manager_concentration(
                        df, kpi, hierarchy_level
                    )
                except Exception as e:
                    self.logger.error(f"Concentration insight error for {kpi}: {e}")
                    kpi_insights['concentration'] = {}
                
                # 2. Leaders & Laggers
                try:
                    kpi_insights['leaders_laggers'] = self.leaders_laggers_gen.generate_manager_leaders_laggers(
                        df, kpi, hierarchy_level
                    )
                except Exception as e:
                    self.logger.error(f"Leaders/Laggers insight error for {kpi}: {e}")
                    kpi_insights['leaders_laggers'] = {}
            
            level_insights['kpi_insights'] = kpi_insights
            
            # 3. Drastic Changes (BM level only)
            if hierarchy_level == 'BM':
                try:
                    level_insights['drastic_changes'] = self.drastic_change_gen.generate_change_insights(df)
                except Exception as e:
                    self.logger.error(f"Drastic change insight error: {e}")
                    level_insights['drastic_changes'] = {}
            
            # 4. Focus Areas (BM level only)
            if hierarchy_level == 'BM':
                try:
                    level_insights['focus_areas'] = self.focus_area_gen.generate_focus_insights(df)
                except Exception as e:
                    self.logger.error(f"Focus area insight error: {e}")
                    level_insights['focus_areas'] = {}
            
            all_insights[hierarchy_level] = level_insights
            
        return all_insights
    
    def display_insights(self, insights: Dict):
        """Display insights in formatted output"""
        for hierarchy_level, level_insights in insights.items():
            print(f"\n{'='*80}")
            print(f"INSIGHTS FOR {hierarchy_level} LEVEL")
            print(f"{'='*80}")
            
            # Display KPI-based insights
            if 'kpi_insights' in level_insights:
                for insight_type, kpi_data in level_insights['kpi_insights'].items():
                    print(f"\n{insight_type.upper()} INSIGHTS:")
                    for kpi, manager_insights in kpi_data.items():
                        print(f"\n--- {kpi} ---")
                        for manager, insight in manager_insights.items():
                            print(f"\n{manager}:\n{insight}")
                            print("-" * 60)
            
            # Display other insights
            for insight_type in ['drastic_changes', 'focus_areas']:
                if insight_type in level_insights:
                    print(f"\n{insight_type.upper()} INSIGHTS:")
                    for manager, insight in level_insights[insight_type].items():
                        print(f"\n{manager}:\n{insight}")
                        print("-" * 60)

# ================= USAGE EXAMPLE =================
def main():
    """Example usage of the insight generation system"""
    
    # Configuration
    config = InsightConfig(
        kpi_list=['Equity Sales', 'SIP Sales Achievement'],
        hierarchy_levels=['ZM', 'BM', 'RM'],
        top_managers=3,
        top_partners=2,
        change_threshold=50.0
    )
    
    # Initialize orchestrator
    orchestrator = InsightOrchestrator(gemini_api_key=config.google, config=config)
    
    # Generate all insights
    all_insights = orchestrator.generate_all_insights(final_data)
    
    # Display insights
    orchestrator.display_insights(all_insights)
    
    return all_insights

if __name__ == "__main__":
    # Setup logging
    logging.basicConfig(level=logging.INFO)
    
    # Run insight generation
    insights = main()

2025-11-10 17:19:16,208 - __main__ - ERROR - Concentration insight error for Equity Sales: 'Partner Name'
2025-11-10 17:19:16,292 - __main__ - ERROR - Leaders/Laggers insight error for Equity Sales: 'Partner Name'
2025-11-10 17:19:16,384 - __main__ - ERROR - Concentration insight error for SIP Sales Achievement: 'Partner Name'
2025-11-10 17:19:16,474 - __main__ - ERROR - Leaders/Laggers insight error for SIP Sales Achievement: 'Partner Name'
2025-11-10 17:19:16,544 - __main__ - ERROR - Concentration insight error for Equity Sales: 'Partner Name'
2025-11-10 17:19:16,578 - __main__ - ERROR - Leaders/Laggers insight error for Equity Sales: 'Partner Name'
2025-11-10 17:19:16,617 - __main__ - ERROR - Concentration insight error for SIP Sales Achievement: 'Partner Name'
2025-11-10 17:19:16,648 - __main__ - ERROR - Leaders/Laggers insight error for SIP Sales Achievement: 'Partner Name'
2025-11-10 17:19:16,661 - __main__ - ERROR - Drastic change insight error: 'Partner Name'
2025-11-10 17:19:1

### BM LEVEL Insights

###Partner Concentration

In [206]:
import pandas as pd
import google.generativeai as genai

# ================= GEMINI SETUP =================
genai.configure(api_key=gemini_api_key)

class DeepInsightGenerator:
    """Uses Gemini LLM to generate descriptive & prescriptive insights dynamically."""
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str) -> str:
        try:
            response = self.model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            return f"Error generating insight: {e}"


# ================= CONFIG =================
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]

thresholds = [25, 50, 75, 80, 90]


# ================= BASE FUNCTION =================
def partner_concentration(df, kpi, thresholds=thresholds):
    df_sorted = (
        df.groupby('Partner Name')[kpi]
        .sum()
        .sort_values(ascending=False)
        .reset_index()
    )

    total_kpi = df_sorted[kpi].sum()
    df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
    df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100

    total_partners = len(df_sorted)
    conc_summary = {}

    for t in thresholds:
        # Find how many partners are needed to reach or exceed t% of total KPI
        num_partners = (df_sorted['Cumulative KPI %'] >= t).idxmax() + 1
        conc_summary[f'Partners for {t}%'] = num_partners
        conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)

    return conc_summary, total_partners, df_sorted

# ================= BRANCH MANAGER CONCENTRATION (TOP 5 BMs) =================
def top_branch_manager_concentration(df, kpi, top_bms=5, top_partners=2, thresholds=thresholds):
    bm_insights = {}

    # Find top 5 BMs by total KPI
    top_bms_list = (
        df.groupby('BM')[kpi]
        .sum()
        .sort_values(ascending=False)
        .head(top_bms)
        .index
        .tolist()
    )

    for bm in top_bms_list:
        group = df[df['BM'] == bm]
        conc_summary, total_partners, df_sorted = partner_concentration(group, kpi, thresholds)
        
        # === Create the readable concentration text (like your example) ===
        bm_text = f"Branch Manager: {bm}\nKPI: {kpi}\n"
        bm_text += f"Total Partners under {bm}: {total_partners}\n\n"
        bm_text += f"Partner Concentration Insight for KPI '{kpi}':\n"
        for t in thresholds:
            bm_text += (
                f" - Top {conc_summary[f'Partners for {t}%']} partners "
                f"({conc_summary[f'% of Total Partners for {t}%']}% of total partners) "
                f"contribute {t}% of total {kpi}.\n"
            )

        # === Show top 2 partners for context ===
        top2 = df_sorted.head(top_partners)[['Partner Name', kpi, 'Cumulative KPI %']]
        bm_text += "\nTop 2 Partners (for reference):\n"
        for _, row in top2.iterrows():
            bm_text += f" - {row['Partner Name']}: {row[kpi]:,.2f} ({row['Cumulative KPI %']:.2f}% cumulative)\n"

        bm_insights[bm] = bm_text

    return bm_insights


# ================= GEMINI INSIGHTS =================
def generate_branch_manager_deep_insights(df, kpi_list, generator, top_bms=5, top_partners=2):
    bm_all_insights = {}

    for kpi in kpi_list:
        bm_summaries = top_branch_manager_concentration(df, kpi, top_bms, top_partners)

        for bm, text in bm_summaries.items():
            prompt = f"""
            You are a financial performance analyst.
            Below is data for Branch Manager '{bm}' on KPI '{kpi}' showing partner concentration.
            Analyze how concentrated performance is (few vs many partners contributing).
            Write 3–4 crisp insights about:
            1. Level of concentration (high/medium/low),
            2. Implications on business dependency,
            3. Partner development or risk recommendations.

            Data:
            {text}
            """
            insight = generator.generate_insight(prompt)
            bm_all_insights.setdefault(bm, {})[kpi] = text + "\n\nLLM Insights:\n" + insight

    return bm_all_insights


# ================= RUN =================
generator = DeepInsightGenerator()

branch_manager_insights = generate_branch_manager_deep_insights(
    merged_with_hierarchyy, kpi_list, generator, top_bms=5, top_partners=2
)

# ================= DISPLAY =================
for bm, kpis in branch_manager_insights.items():
    print(f"\n######## Branch Manager: {bm} ########")
    for kpi, insight in kpis.items():
        print(f"\n=== {kpi} ===")
        print(insight)
        print("-" * 80)
        



######## Branch Manager: SARFARAJ YAFAI ########

=== Equity Sales ===
Branch Manager: SARFARAJ YAFAI
KPI: Equity Sales
Total Partners under SARFARAJ YAFAI: 417

Partner Concentration Insight for KPI 'Equity Sales':
 - Top 4 partners (0.96% of total partners) contribute 25% of total Equity Sales.
 - Top 12 partners (2.88% of total partners) contribute 50% of total Equity Sales.
 - Top 33 partners (7.91% of total partners) contribute 75% of total Equity Sales.
 - Top 39 partners (9.35% of total partners) contribute 80% of total Equity Sales.
 - Top 62 partners (14.87% of total partners) contribute 90% of total Equity Sales.

Top 2 Partners (for reference):
 - VND WEALTH PRIVATE LIMITED: 2,871,847,343.08 (8.24% cumulative)
 - F2 FUNDS PRIVATE LIMITED: 2,318,821,040.14 (14.90% cumulative)


LLM Insights:
Here are 3 key insights regarding Sarfaraj Yafai's Equity Sales performance, based on partner concentration:

1.  **High Concentration:** The Equity Sales performance under Sarfaraj Yafa

In [32]:
merged_with_hiaerarchyy.head(2)

NameError: name 'merged_with_hierarchyy' is not defined

In [238]:
merged_with_hierarchyy.head(2)

Unnamed: 0,Sr No.,Partner Code,Partner Name,Center_x,Category,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year,Sr No,Broker Code,Doer Name,Doer Type,Center_y,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement,ZM,SRM,RM,BM
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0,2023,1,23676,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0,4999.76,1.67,17.82,E-Surat,,MANOJ PATEL,DENISH M. PATEL
1,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0,2023,1,23676,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0,4999.76,1.67,17.82,E-Surat,,MANOJ PATEL,DENISH M. PATEL


In [117]:
import json
print(json.dumps(branch_manager_insights["SARFARAJ YAFAI"], indent=2)[:1000])


{
  "Equity Sales": "Branch Manager: SARFARAJ YAFAI\nKPI: Equity Sales\nTotal Partners under SARFARAJ YAFAI: 417\n\nPartner Concentration Insight for KPI 'Equity Sales':\n - Top 3 partners (0.72% of total partners) contribute 25% of total Equity Sales.\n - Top 11 partners (2.64% of total partners) contribute 50% of total Equity Sales.\n - Top 32 partners (7.67% of total partners) contribute 75% of total Equity Sales.\n - Top 38 partners (9.11% of total partners) contribute 80% of total Equity Sales.\n - Top 61 partners (14.63% of total partners) contribute 90% of total Equity Sales.\n\nTop 2 Partners (for reference):\n - VND WEALTH PRIVATE LIMITED: 2,871,847,343.08 (8.24% cumulative)\n - F2 FUNDS PRIVATE LIMITED: 2,318,821,040.14 (14.90% cumulative)\n\n\nLLM Insights:\nHere's an analysis of Sarfaraj Yafai's Equity Sales partner concentration:\n\n**Insights:**\n\n1.  **High Concentration:** Sarfaraj Yafai's Equity Sales exhibit a high degree of partner concentration. A small percentage 

In [207]:
import re
import pandas as pd
from sqlalchemy import create_engine

engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")

def extract_partner_name(text):
    """
    Extracts the first partner name from the 'Top 2 Partners' section.
    Handles variations in spacing and punctuation.
    """
    match_section = re.search(r"Top\s+\d+\s+Partners.*?:([\s\S]*)", text)
    if match_section:
        section_text = match_section.group(1)
        partner_matches = re.findall(r"-\s*([A-Za-z0-9&\.\'\s]+):", section_text)
        if partner_matches:
            return partner_matches[0].strip()

    return "Unknown Partner"


def save_insights_to_mysql(insight_dict, insight_type="General_Insight"):
    records = []

    for bm, nested_dict in insight_dict.items():
        if isinstance(nested_dict, dict):
            for _, insight_text in nested_dict.items():
                partner_name = extract_partner_name(insight_text)
                records.append({
                    "BM": bm,
                    "Partner_Name": partner_name,
                    "Insight_Type": insight_type,
                    "Insight": str(insight_text),
                })
        else:
            records.append({
                "BM": bm,
                "Partner_Name": "Unknown Partner",
                "Insight_Type": insight_type,
                "Insight": str(nested_dict),
            })

    df_to_save = pd.DataFrame(records)

    df_to_save.to_sql(
        name="AllInsights",
        con=engine,
        if_exists="append",
        index=False
    )
    print(f" Saved {len(df_to_save)} insights. Unknown Partners: {sum(df_to_save['Partner_Name']=='Unknown Partner')}")


In [208]:
save_insights_to_mysql(branch_manager_insights, "Partner_Concentration")


 Saved 20 insights. Unknown Partners: 0


  df_to_save.to_sql(


In [102]:
merged_with_hierarchyy.head()

Unnamed: 0,Sr No.,Partner Code,Partner Name,Center_x,Category,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year,Sr No,Broker Code,Doer Name,Doer Type,Center_y,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement,ZM,SRM,RM,BM
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,1,23676,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82,E-Surat,,MANOJ PATEL,DENISH M. PATEL
1,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,1,23676,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82,E-Surat,,MANOJ PATEL,DENISH M. PATEL
2,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,2,20361,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04,E-Surat,,MANOJ PATEL,DENISH M. PATEL
3,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,2,20361,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04,E-Surat,,MANOJ PATEL,DENISH M. PATEL
4,3,24695,CHINTU KUMAR SHAW,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1200000,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,27000,0.0,0.0,0.0,0.0,2023,3,24695,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1200000,0.0,0.0,,,,27000,0.0,0.0,16,3.0,18.75,0.0,0,,0.0,0.0,2.81,E-Surat,,MANOJ PATEL,DENISH M. PATEL


In [57]:
import pandas as pd
import time
import google.generativeai as genai
from collections import defaultdict

# ================= GEMINI SETUP =================
genai.configure(api_key=google_api_key)

# Optional fallback (if Gemini quota exceeds)
USE_FALLBACK = True
try:
    from openai import OpenAI
    openai_client = OpenAI(api_key=google_api_key)  # Add your OpenAI key if needed
except ImportError:
    USE_FALLBACK = False


# ================= CLASS =================
class DeepInsightGenerator:
    """Uses Gemini LLM (with fallback) to generate short insights."""
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model_name = model_name
        self.model = genai.GenerativeModel(model_name)
        self.cache = {}  # cache prompts to avoid duplicate calls

    def generate_insight(self, prompt: str) -> str:
        if prompt in self.cache:
            return self.cache[prompt]

        try:
            response = self.model.generate_content(prompt)
            text = response.text.strip()
        except Exception as e:
            error_msg = str(e)
            if "429" in error_msg and USE_FALLBACK:
                # fallback to OpenAI if Gemini quota exceeded
                print("⚠️ Gemini quota hit — switching to GPT-4o-mini fallback.")
                text = self._generate_with_fallback(prompt)
            else:
                text = f"Error generating insight: {e}"

        self.cache[prompt] = text
        time.sleep(0.5)  # small delay to avoid hitting rate limit
        return text

    def _generate_with_fallback(self, prompt: str) -> str:
        try:
            response = openai_client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[{"role": "user", "content": prompt}],
                max_tokens=250
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            return f"Error with fallback model: {e}"


# ================= CONFIG =================
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]


# ================= LEADERS & LAGGERS =================
def branch_manager_leaders_laggers(df, kpi, top_bms=3, top_n=2):
    bm_results = {}
    top_bms_list = (
        df.groupby('BM')[kpi]
        .sum()
        .sort_values(ascending=False)
        .head(top_bms)
        .index
        .tolist()
    )

    for bm in top_bms_list:
        bm_df = df[df['BM'] == bm]
        perf = (
            bm_df.groupby('Partner Name')[kpi]
            .sum()
            .sort_values(ascending=False)
            .reset_index()
        )
        total_perf = perf[kpi].sum()

        leaders = perf.head(top_n).copy()
        leaders['% Share'] = round(leaders[kpi] / total_perf * 100, 2)
        laggers = perf.tail(top_n).copy()
        laggers['% Share'] = round(laggers[kpi] / total_perf * 100, 2)

        summary_text = (
            f"Branch Manager: {bm}\nKPI: {kpi}\n\n"
            f"Total Partners: {len(perf)}\n"
            f"Top {top_n} Leaders contribute: {leaders['% Share'].sum():.2f}%\n"
            f"Bottom {top_n} Laggers contribute: {laggers['% Share'].sum():.2f}%\n\n"
            "Top Partners:\n" +
            "\n".join(f" - {r['Partner Name']}: {r[kpi]:,.2f} ({r['% Share']}%)" for _, r in leaders.iterrows()) +
            "\n\nLagging Partners:\n" +
            "\n".join(f" - {r['Partner Name']}: {r[kpi]:,.2f} ({r['% Share']}%)" for _, r in laggers.iterrows())
        )

        bm_results[bm] = summary_text
    return bm_results


# ================= GEMINI INSIGHT GENERATION =================
def generate_leaders_laggers_insights(df, kpi_list, generator, top_bms=3, top_n=2):
    all_bm_insights = defaultdict(dict)

    for kpi in kpi_list:
        bm_summaries = branch_manager_leaders_laggers(df, kpi, top_bms, top_n)

        for bm, text in bm_summaries.items():
            prompt = f"""
                    You are a financial analyst.
                    Below is partner concentration data for Branch Manager: {bm}, KPI: {kpi}.
                    
                    Write **2–3 short bullet insights (max 150 words)** about:
                    1. Whether performance is dominated by a few or many partners,
                    2. Impact of leaders on total performance.
                    
                    Keep it concise and business-focused.
                    
                    Data:
                    {text}
                    """
            insight = generator.generate_insight(prompt)
            all_bm_insights[bm][kpi] = f"{text}\n\nLLM Insights:\n{insight}"

    return all_bm_insights


# ================= EXECUTION =================
generator = DeepInsightGenerator()
leaders_laggers_insights = generate_leaders_laggers_insights(
    merged_with_hierarchyy, kpi_list, generator, top_bms=3, top_n=2
)

# ================= DISPLAY =================
for bm, kpis in leaders_laggers_insights.items():
    print(f"\n######## Branch Manager: {bm} ########")
    for kpi, insight in kpis.items():
        print(f"\n=== {kpi} ===")
        print(insight)
        print("-" * 80)



######## Branch Manager: SARFARAJ YAFAI ########

=== Equity Sales ===
Branch Manager: SARFARAJ YAFAI
KPI: Equity Sales

Total Partners: 417
Top 2 Leaders contribute: 14.90%
Bottom 2 Laggers contribute: -2.59%

Top Partners:
 - VND WEALTH PRIVATE LIMITED: 2,871,847,343.08 (8.241395143071161%)
 - F2 FUNDS PRIVATE LIMITED: 2,318,821,040.14 (6.654365004431456%)

Lagging Partners:
 - Sujit Jevatlal Shah: -439,067,724.44 (-1.260001030485945%)
 - RASHMI SHREYANSH SANGHANI: -464,129,380.38 (-1.3319209429558476%)

LLM Insights:
Here are a few key insights from the partner concentration data for Branch Manager Sarfraj Yafai:

*   **Performance Concentration:** Equity Sales are moderately concentrated, with the top two partners contributing 14.9% of the total. While not excessively dominant, their performance has a notable influence on the overall results.

*   **Leader Impact:** The Top 2 Leaders contribute significantly to the upside while the bottom two Laggers are heavily weighing down perf

In [58]:
merged_with_hierarchyy.shape

(131950, 53)

In [59]:
merged_df.shape

(81001, 49)

### BN Level (Leaders & Laggers)

In [137]:
import pandas as pd
import google.generativeai as genai

# ================= GEMINI SETUP =================
genai.configure(api_key=gemini_api_key)

class DeepInsightGenerator:
    """Uses Gemini LLM to generate descriptive & prescriptive insights dynamically."""
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str) -> str:
        try:
            response = self.model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            return f"Error generating insight: {e}"


# ================= CONFIG =================
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]


# ================= LEADERS & LAGGERS =================
def branch_manager_leaders_laggers(df, kpi, top_bms=5, top_n=3):
    """
    For each of the top BMs (by KPI total), find top and bottom partners
    and quantify their share of performance.
    """
    bm_results = {}

    # Top 5 BMs overall for that KPI
    top_bms_list = (
        df.groupby('BM')[kpi]
        .sum()
        .sort_values(ascending=False)
        .head(top_bms)
        .index
        .tolist()
    )

    for bm in top_bms_list:
        bm_df = df[df['BM'] == bm]

        # Sort partners under that BM
        perf = (
            bm_df.groupby('Partner Name')[kpi]
            .sum()
            .sort_values(ascending=False)
            .reset_index()
        )

        total_perf = perf[kpi].sum()

        # Leaders (Top N)
        leaders = perf.head(top_n).copy()
        leaders['% Share'] = round(leaders[kpi] / total_perf * 100, 2)

        # Laggers (Bottom N)
        laggers = perf.tail(top_n).copy()
        laggers['% Share'] = round(laggers[kpi] / total_perf * 100, 2)

        # Performance summary
        leaders_contrib = leaders['% Share'].sum()
        laggers_contrib = laggers['% Share'].sum()

        summary_text = f"Branch Manager: {bm}\nKPI: {kpi}\n\n"
        summary_text += f"Total Partners under {bm}: {len(perf)}\n"
        summary_text += f"Top {top_n} Leaders contribute: {leaders_contrib:.2f}% of total {kpi}\n"
        summary_text += f"Bottom {top_n} Laggers contribute: {laggers_contrib:.2f}% of total {kpi}\n\n"

        summary_text += "Top Performing (Leader) Partners:\n"
        for _, row in leaders.iterrows():
            summary_text += f" - {row['Partner Name']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"

        summary_text += "\nLow Performing (Lagger) Partners:\n"
        for _, row in laggers.iterrows():
            summary_text += f" - {row['Partner Name']}: {row[kpi]:,.2f} ({row['% Share']}%)\n"

        bm_results[bm] = summary_text

    return bm_results


# ================= GEMINI INSIGHT GENERATION =================
def generate_leaders_laggers_insights(df, kpi_list, generator, top_bms=5, top_n=3):
    all_bm_insights = {}

    for kpi in kpi_list:
        bm_summaries = branch_manager_leaders_laggers(df, kpi, top_bms, top_n)

        for bm, text in bm_summaries.items():
            prompt = f"""
            You are a financial performance analyst.
            Below is data for Branch Manager '{bm}' on KPI '{kpi}' showing top and bottom performing partners.

            Write a professional 3–4 bullet point insight covering:
            1. Performance dependency (e.g., dominated by few top partners or evenly spread),
            2. Impact of leaders on total performance,
            3. Weak link from laggers and improvement recommendations,
            4. Any early warning or strategic focus points.

            Data:
            {text}
            """
            insight = generator.generate_insight(prompt)
            all_bm_insights.setdefault(bm, {})[kpi] = text + "\n\nLLM Insights:\n" + insight

    return all_bm_insights


# ================= EXECUTION =================
generator = DeepInsightGenerator()

leaders_laggers_insights = generate_leaders_laggers_insights(
    merged_with_hierarchyy, kpi_list, generator, top_bms=5, top_n=3
)


# ================= DISPLAY =================
for bm, kpis in leaders_laggers_insights.items():
    print(f"\n######## Branch Manager: {bm} ########")
    for kpi, insight in kpis.items():
        print(f"\n=== {kpi} ===")
        print(insight)
        print("-" * 80)



######## Branch Manager: SARFARAJ YAFAI ########

=== Equity Sales ===
Branch Manager: SARFARAJ YAFAI
KPI: Equity Sales

Total Partners under SARFARAJ YAFAI: 417
Top 3 Leaders contribute: 21.42% of total Equity Sales
Bottom 3 Laggers contribute: -3.30% of total Equity Sales

Top Performing (Leader) Partners:
 - VND WEALTH PRIVATE LIMITED: 2,871,847,343.08 (8.241395143071161%)
 - F2 FUNDS PRIVATE LIMITED: 2,318,821,040.14 (6.654365004431456%)
 - SNOWBALL FINANCIAL SERVICES PVT LTD: 2,274,146,547.82 (6.526161760999183%)

Low Performing (Lagger) Partners:
 - Shanthi Ramanujam: -247,375,707.40 (-0.7098987898478141%)
 - Sujit Jevatlal Shah: -439,067,724.44 (-1.260001030485945%)
 - RASHMI SHREYANSH SANGHANI: -464,129,380.38 (-1.3319209429558476%)


LLM Insights:
Here's a performance analysis for Branch Manager Sarfaraj Yafai based on the provided equity sales data:

*   **Concentrated Performance:** Equity sales performance is heavily reliant on a small group of top partners. The top 3 part

In [209]:
save_leader_lagger_insights_to_mysql(leaders_laggers_insights, "Leaders_and_Laggers")


Saved 122 leader/lagger insights.
Error saving to MySQL: 'Performance_Type'


  df_to_save.to_sql(


#### sqldb code -leaders & laggers

In [146]:
import re
import pandas as pd
from sqlalchemy import create_engine

# ================== MySQL Connection ==================
engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")

# ================== Helper Function ==================
def extract_top_and_bottom_partners(text):
    """
    Extracts Top Performing and Low Performing partner names from the insight text.
    Returns up to top 3 + bottom 3 partner names.
    """
    # Extract top performing partners
    top_match = re.search(r"Top Performing.*?:([\s\S]*?)Low Performing", text)
    bottom_match = re.search(r"Low Performing.*?:([\s\S]*)", text)

    top_partners, bottom_partners = [], []

    if top_match:
        top_section = top_match.group(1)
        top_partners = re.findall(r"-\s*([A-Za-z0-9&\.\'\s]+):", top_section)

    if bottom_match:
        bottom_section = bottom_match.group(1)
        bottom_partners = re.findall(r"-\s*([A-Za-z0-9&\.\'\s]+):", bottom_section)

    # Clean whitespace
    top_partners = [p.strip() for p in top_partners if p.strip()]
    bottom_partners = [p.strip() for p in bottom_partners if p.strip()]

    return top_partners, bottom_partners


# ================== Save to MySQL ==================
def save_leader_lagger_insights_to_mysql(insight_dict, insight_type="Leaders_and_Laggers"):
    """
    Saves insights for Leaders and Laggers with Partner_Name extracted from text.
    Creates one row per partner (top or bottom performer).
    """
    records = []

    for bm, nested_dict in insight_dict.items():
        if isinstance(nested_dict, dict):
            for _, insight_text in nested_dict.items():
                top_partners, bottom_partners = extract_top_and_bottom_partners(insight_text)

                # Save top performing partners
                for p in top_partners:
                    records.append({
                        "BM": bm,
                        "Partner_Name": p,
                        "Insight_Type": insight_type,
                        "Insight": str(insight_text),
                    })

                # Save low performing partners
                for p in bottom_partners:
                    records.append({
                        "BM": bm,
                        "Partner_Name": p,
                        "Insight_Type": insight_type,
                        "Insight": str(insight_text),
                    })
        else:
            records.append({
                "BM": bm,
                "Partner_Name": "Unknown Partner",
                "Insight_Type": insight_type,
                "Insight": str(nested_dict),
            })

    # Convert to DataFrame
    df_to_save = pd.DataFrame(records)

    # Save to SQL
    try:
        df_to_save.to_sql(
            name="AllInsights",
            con=engine,
            if_exists="append",
            index=False
        )
        print(f"Saved {len(df_to_save)} leader/lagger insights.")
        print(f"Leaders: {sum(df_to_save['Performance_Type']=='Leader')}, Laggers: {sum(df_to_save['Performance_Type']=='Lagger')}")
    except Exception as e:
        print(f"Error saving to MySQL: {e}")


#### Partners changing course / drastic change in performance 


In [127]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ================ GEMINI SETUP ================
gemini_api_key = google_api_key  # Replace with your valid Gemini key
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

# ================ CONFIG ================
kpi_cols = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]
threshold_pct_change = 50   # % threshold for major YoY change
year_col = 'FY_Year'
partner_col = 'Partner Name'
bm_col = 'BM'
top_bm = 2                  # Top 2 Branch Managers
top_partners = 2            # Top 2 Partners per BM


# ================ STEP 1: DETECT DRASTIC CHANGES ================
def detect_drastic_changes(df):
    """Detect YoY KPI changes (above threshold) per partner per BM."""
    df_sorted = df.sort_values([bm_col, partner_col, year_col])
    change_records = []

    for (bm, partner), group in df_sorted.groupby([bm_col, partner_col]):
        group = group.sort_values(year_col)
        for i in range(1, len(group)):
            prev, curr = group.iloc[i - 1], group.iloc[i]

            for kpi in kpi_cols:
                prev_val, curr_val = prev[kpi], curr[kpi]
                if pd.isna(prev_val) or pd.isna(curr_val) or prev_val == 0:
                    continue
                pct_change = ((curr_val - prev_val) / abs(prev_val)) * 100

                if abs(pct_change) >= threshold_pct_change:
                    change_records.append({
                        'BM': bm,
                        'Partner Name': partner,
                        'KPI': kpi,
                        'Year From': prev[year_col],
                        'Year To': curr[year_col],
                        'Previous Value': round(prev_val, 2),
                        'Current Value': round(curr_val, 2),
                        '% Change': round(pct_change, 2)
                    })

    return pd.DataFrame(change_records)


# ================ STEP 2: SELECT TOP 2 BMs & TOP 2 PARTNERS ================
def get_top_bm_partners(change_df):
    """Get top 2 BMs and within each, top 2 partners with max change magnitude."""
    if change_df.empty:
        return pd.DataFrame()

    # Rank BMs by total magnitude of change
    top_bms = (
        change_df.groupby('BM')['% Change']
        .apply(lambda x: x.abs().mean())
        .sort_values(ascending=False)
        .head(top_bm)
        .index
    )

    top_df = change_df[change_df['BM'].isin(top_bms)]
    final_records = []

    # Within each BM, take top 2 partners
    for bm in top_bms:
        bm_data = top_df[top_df['BM'] == bm]
        top_partners_for_bm = (
            bm_data.groupby('Partner Name')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(top_partners)
            .index
        )
        final_records.append(bm_data[bm_data['Partner Name'].isin(top_partners_for_bm)])

    return pd.concat(final_records)


# ================ STEP 3: GENERATE LLM INSIGHTS ================
def generate_bm_insights(change_df):
    """Generate insights for each BM and its top 2 partners."""
    bm_insights = {}

    if change_df.empty:
        return {"All": "No significant performance changes detected above threshold."}

    for bm, group in change_df.groupby('BM'):
        combined_text = f"\n📊 Branch Manager: {bm}\n"

        # Top 2 partners for this BM
        top_partner_names = (
            group.groupby('Partner Name')['% Change']
            .apply(lambda x: x.abs().mean())
            .sort_values(ascending=False)
            .head(top_partners)
            .index
        )

        for partner in top_partner_names:
            partner_data = group[group['Partner Name'] == partner]

            prompt = f"""
You are a senior financial analyst.
Analyze the KPI performance changes for partner '{partner}' under Branch Manager '{bm}'.

Below is year-over-year data showing KPI shifts beyond ±{threshold_pct_change}%:
{partner_data}

(3–4 bullet points) that :
1. Identifies which KPIs show the largest positive or negative shifts, with actual % changes.
2. Interprets what these changes reveal about the partner’s business direction.
3. Explains how these shifts impact the Branch Manager’s overall portfolio balance.
4. Provides 1–2 quantified, strategic recommendations for the BM — such as stabilizing declines or scaling strong areas.

Tone: Data-driven, concise, and suitable for a sales performance dashboard.
Use ↑ and ↓ symbols for directionality.
Avoid generic phrasing; use the numbers provided.
"""
            try:
                resp = model.generate_content(
                    prompt,
                    generation_config={
                        "temperature": 0.6,
                        "top_p": 0.9,
                        "max_output_tokens": 800
                    }
                )
                combined_text += f"\n🔹 Partner: {partner}\n"
                combined_text += resp.text.strip() + "\n" + ("-" * 100) + "\n"
            except Exception as e:
                combined_text += f"⚠️ Error generating insight for {partner}: {e}\n"

        bm_insights[bm] = combined_text

    return bm_insights


# ================ STEP 4: EXECUTION ================
change_df = detect_drastic_changes(merged_with_hierarchyy)
filtered_df = get_top_bm_partners(change_df)
bm_partner_insights = generate_bm_insights(filtered_df)

# ================ STEP 5: DISPLAY OUTPUT ================
print("\n================= DRASTIC CHANGE SUMMARY (Filtered) =================\n")
print(filtered_df.head(10))

print("\n================= BRANCH MANAGER INSIGHTS =================\n")
for bm, insight in bm_partner_insights.items():
    print(insight)
    print("=" * 120)




                       BM           Partner Name  \
57023  SANTAPURI MAHENDAR      AEJAZ AHMED SHAIK   
57024  SANTAPURI MAHENDAR      AEJAZ AHMED SHAIK   
57025  SANTAPURI MAHENDAR      AEJAZ AHMED SHAIK   
57196  SANTAPURI MAHENDAR  MAHESHWARAM PUTTAPAGA   
57197  SANTAPURI MAHENDAR  MAHESHWARAM PUTTAPAGA   
57198  SANTAPURI MAHENDAR  MAHESHWARAM PUTTAPAGA   
48458      RAVI KACHIWALA   PATEL JIGAR KANTILAL   
48459      RAVI KACHIWALA   PATEL JIGAR KANTILAL   
48460      RAVI KACHIWALA   PATEL JIGAR KANTILAL   
48461      RAVI KACHIWALA   PATEL JIGAR KANTILAL   

                                    KPI Year From Year To  Previous Value  \
57023                      Equity Sales      2023    2024         1499.92   
57024             SIP Sales Achievement      2023    2024         1499.92   
57025  Investment Net Sales Achievement      2023    2024         1499.92   
57196                      Equity Sales      2024    2025            9.05   
57197            Net Sales through MARS 

In [211]:
save_insights_to_mysql(bm_partner_insights, "Sudden Change")

 Saved 2 insights successfully.
 Unknown Partners: 0


  df_to_save.to_sql(


### sqldb - sudden change

In [210]:
##db - sudden change
import re
import pandas as pd
from sqlalchemy import create_engine

# ==================== MYSQL CONNECTION ====================
engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")


# ==================== PARTNER NAME EXTRACTION ====================
def extract_partner_name(text):
    """
    Extracts partner name for Partner Concentration, Leaders & Laggers, and Sudden Change insights.
    Handles:
    - 'Partner:' lines
    - 'Top N Partners' sections
    Returns 'Unknown Partner' if no match found.
    """
    if not isinstance(text, str):
        return "Unknown Partner"

    # 1️⃣ Case 1: For 'Sudden Change' type insights → "Partner: XYZ"
    match_partner = re.search(r"Partner:\s*([A-Za-z\s\.\-&']+)", text)
    if match_partner:
        return match_partner.group(1).strip()

    # 2️⃣ Case 2: For 'Top N Partners' pattern → "- Partner Name:"
    match_section = re.search(r"Top\s+\d+\s+Partners.*?:([\s\S]*)", text)
    if match_section:
        section_text = match_section.group(1)
        partner_matches = re.findall(r"-\s*([A-Za-z0-9&\.\'\s]+):", section_text)
        if partner_matches:
            return partner_matches[0].strip()

    # 3️⃣ Fallback
    return "Unknown Partner"


# ==================== SAVE FUNCTION ====================
def save_insights_to_mysql(insight_dict, insight_type="General_Insight"):
    """
    Saves insights into MySQL.
    Extracts Partner_Name dynamically from text using extract_partner_name().
    """
    records = []

    for bm, nested_dict in insight_dict.items():
        if isinstance(nested_dict, dict):
            for _, insight_text in nested_dict.items():
                partner_name = extract_partner_name(insight_text)
                records.append({
                    "BM": bm,
                    "Partner_Name": partner_name,
                    "Insight_Type": insight_type,
                    "Insight": str(insight_text),
                })
        else:
            # If no nested structure
            partner_name = extract_partner_name(str(nested_dict))
            records.append({
                "BM": bm,
                "Partner_Name": partner_name,
                "Insight_Type": insight_type,
                "Insight": str(nested_dict),
            })

    # Convert to DataFrame
    df_to_save = pd.DataFrame(records)

    # Save to MySQL
    try:
        df_to_save.to_sql(
            name="AllInsights",
            con=engine,
            if_exists="append",
            index=False
        )
        print(f" Saved {len(df_to_save)} insights successfully.")
        print(f" Unknown Partners: {sum(df_to_save['Partner_Name']=='Unknown Partner')}")
    except Exception as e:
        print(f" Error saving insights to MySQL: {e}")



##Partners and areas/business dsnumbers to focus on to improve performance optimally 

In [212]:
import pandas as pd
import numpy as np
import google.generativeai as genai

gemini_api_key = google_api_key  
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

kpi_cols = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]
partner_col = 'Partner Name'
bm_col = 'BM'
target_col_suffix = 'Target'
top_bm = 3         
top_partners = 2    


# ================ STEP 1: Identify Performance Gaps ================
def identify_focus_areas(df):
    """Calculate % gap vs target or overall average per partner per BM."""
    focus_records = []
    kpi_avg = df[kpi_cols].mean()

    for (bm, partner), group in df.groupby([bm_col, partner_col]):
        partner_row = group.iloc[-1]  # latest record
        gaps = {}

        for kpi in kpi_cols:
            possible_targets = [col for col in df.columns if target_col_suffix in col and kpi in col]
            target_value = partner_row[possible_targets[0]] if possible_targets else kpi_avg[kpi]

            if target_value and not pd.isna(target_value):
                gap_pct = ((target_value - partner_row[kpi]) / target_value) * 100
            else:
                gap_pct = 0

            gaps[kpi] = round(gap_pct, 2)

        max_gap = max(gaps.values())
        sorted_gaps = sorted(gaps.items(), key=lambda x: x[1], reverse=True)

        focus_records.append({
            'BM': bm,
            'Partner Name': partner,
            'Focus KPIs': sorted_gaps,
            'Max Gap': max_gap
        })

    focus_df = pd.DataFrame(focus_records)
    return focus_df


# ================ STEP 2: Filter Top 3 BMs & Top 2 Partners ================
def get_top_bm_partners(focus_df):
    """Select top 3 BMs and within each, top 2 partners by max gap."""
    bm_rank = (
        focus_df.groupby('BM')['Max Gap']
        .max()
        .sort_values(ascending=False)
        .head(top_bm)
        .index
    )
    filtered = focus_df[focus_df['BM'].isin(bm_rank)]

    # Take top 2 partners for each BM
    top_data = (
        filtered.groupby('BM', group_keys=False)
        .apply(lambda x: x.sort_values('Max Gap', ascending=False).head(top_partners))
        .reset_index(drop=True)
    )
    return top_data


# ================ STEP 3: Generate Insights (LLM) ================
def generate_partner_focus_insights(focus_df):
    """Generate LLM-based recommendations per BM and Partner (kept separate)."""
    bm_insights = {}

    for bm, group in focus_df.groupby('BM'):
        bm_insights[bm] = {}  # store partners separately for each BM

        for _, row in group.iterrows():
            partner = row['Partner Name']
            focus_kpis = row['Focus KPIs']

            prompt = f"""
You are a senior business analyst.
Analyze the performance gaps for partner '{partner}' under Branch Manager '{bm}'.

Below are the KPIs and their % gaps (higher = more underperformance):
{focus_kpis}

Your task:
- Focus only on quantitative insights — do not generate generic text.
- Use the % gaps to suggest realistic improvement targets (X%) based on the data.
- Estimate how much total branch performance (Y%) could improve if this partner closes these gaps partially or fully.

Generate exactly 3–4 concise insights:
1. Identify the top 2–3 KPIs or business areas with highest % gaps.
2. For each, specify a realistic % improvement target (X%) and expected partner-level performance gain.
3. Estimate total potential branch performance improvement (Y%) if these improvements occur.
4. End with one actionable recommendation for the Branch Manager.

Output format example:
- Increase [KPI Name] by X% → Partner performance up by Y%.
- If [Partner] improves [KPI] by X%, total branch performance improves by Y%.
- Specific actionable recommendation for BM.
"""
            try:
                resp = model.generate_content(
                    prompt,
                    generation_config={
                        "temperature": 0.7,
                        "top_p": 0.9,
                        "top_k": 40,
                        "max_output_tokens": 800
                    }
                )
                bm_insights[bm][partner] = resp.text.strip()

            except Exception as e:
                bm_insights[bm][partner] = f"Error generating insight: {e}"

    return bm_insights


# ================ STEP 4: EXECUTION ================
focus_df = identify_focus_areas(merged_with_hierarchyy)
filtered_df = get_top_bm_partners(focus_df)
bm_focus_insights = generate_partner_focus_insights(filtered_df)


# ================ STEP 5: DISPLAY ================
print("\n================= TOP PARTNERS & AREAS TO FOCUS ON =================\n")
print(filtered_df.head(10))

print("\n================= BRANCH MANAGER FOCUS INSIGHTS =================\n")
for bm, partners in bm_focus_insights.items():
    print(f"\n######## Branch Manager: {bm} ########\n")
    for partner, insight in partners.items():
        print(f"\nPartner: {partner}\n")
        print(insight)
        print("-" * 100)
    print("=" * 120)


  .apply(lambda x: x.sort_values('Max Gap', ascending=False).head(top_partners))




                                    BM                     Partner Name  \
0                    BIPIN KUMAR SINGH                Aishwarya Agarwal   
1                    BIPIN KUMAR SINGH           Narendra Kumar Jaiswal   
2                       JIGNESH B SHAH            Amish Maheshbhai Shah   
3                       JIGNESH B SHAH                  JAGRUTI A DOSHI   
4  SHAILENDRAKUMAR AVDHESHKUMAR MISHRA  KASHYAPBHAI CHANDRAVADAN PARIKH   
5  SHAILENDRAKUMAR AVDHESHKUMAR MISHRA                 Ashok C Shah HUF   

                                          Focus KPIs   Max Gap  
0  [(SIP Sales Achievement, 13537.39), (Investmen...  13537.39  
1  [(Equity Sales, 487.47), (Investment Net Sales...    487.47  
2  [(Equity Sales, 15352.17), (Investment Net Sal...  15352.17  
3  [(Equity Sales, 3982.16), (Investment Net Sale...   3982.16  
4  [(Investment Net Sales Achievement, 9612.57), ...   9612.57  
5  [(Equity Sales, 2025.16), (Investment Net Sale...   2025.16  



######## Branc

In [241]:
### BM- AREA TO FOCUS

In [213]:
import pandas as pd
import numpy as np
import re
import google.generativeai as genai
from sqlalchemy import create_engine

# ------------------ GEMINI CONFIG ------------------
gemini_api_key = google_api_key  
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

# ------------------ MYSQL CONNECTION ------------------
engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")

# ------------------ COLUMNS CONFIG ------------------
kpi_cols = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement'
]
partner_col = 'Partner Name'
bm_col = 'BM'
target_col_suffix = 'Target'
top_bm = 3
top_partners = 2


# ================ STEP 1: Identify Performance Gaps ================
def identify_focus_areas(df):
    """Calculate % gap vs target or overall average per partner per BM."""
    focus_records = []
    kpi_avg = df[kpi_cols].mean()

    for (bm, partner), group in df.groupby([bm_col, partner_col]):
        partner_row = group.iloc[-1]
        gaps = {}

        for kpi in kpi_cols:
            possible_targets = [col for col in df.columns if target_col_suffix in col and kpi in col]
            target_value = partner_row[possible_targets[0]] if possible_targets else kpi_avg[kpi]

            if target_value and not pd.isna(target_value):
                gap_pct = ((target_value - partner_row[kpi]) / target_value) * 100
            else:
                gap_pct = 0

            gaps[kpi] = round(gap_pct, 2)

        max_gap = max(gaps.values())
        sorted_gaps = sorted(gaps.items(), key=lambda x: x[1], reverse=True)

        focus_records.append({
            'BM': bm,
            'Partner Name': partner,
            'Focus KPIs': sorted_gaps,
            'Max Gap': max_gap
        })

    return pd.DataFrame(focus_records)


# ================ STEP 2: Filter Top 3 BMs & Top 2 Partners ================
def get_top_bm_partners(focus_df):
    """Select top 3 BMs and within each, top 2 partners by max gap."""
    bm_rank = (
        focus_df.groupby('BM')['Max Gap']
        .max()
        .sort_values(ascending=False)
        .head(top_bm)
        .index
    )
    filtered = focus_df[focus_df['BM'].isin(bm_rank)]

    top_data = (
        filtered.groupby('BM', group_keys=False)
        .apply(lambda x: x.sort_values('Max Gap', ascending=False).head(top_partners))
        .reset_index(drop=True)
    )
    return top_data


# ================ STEP 3: Generate Insights (LLM) ================
def generate_partner_focus_insights(focus_df):
    """Generate LLM-based recommendations per BM and Partner."""
    bm_insights = {}

    for bm, group in focus_df.groupby('BM'):
        bm_insights[bm] = {}

        for _, row in group.iterrows():
            partner = row['Partner Name']
            focus_kpis = row['Focus KPIs']

            prompt = f"""
You are a senior business analyst.
Analyze the performance gaps for partner '{partner}' under Branch Manager '{bm}'.

Below are the KPIs and their % gaps (higher = more underperformance):
{focus_kpis}

Your task:
- Focus only on quantitative insights — do not generate generic text.
- Use the % gaps to suggest realistic improvement targets (X%) based on the data.
- Estimate how much total branch performance (Y%) could improve if this partner closes these gaps partially or fully.

Generate exactly 3–4 concise insights:
1. Identify the top 2–3 KPIs with highest % gaps.
2. Suggest realistic % improvement targets (X%) and expected partner-level performance gains.
3. Estimate branch-level improvement (Y%) if achieved.
4. End with one actionable recommendation for the Branch Manager.
"""
            try:
                resp = model.generate_content(
                    prompt,
                    generation_config={
                        "temperature": 0.7,
                        "top_p": 0.9,
                        "top_k": 40,
                        "max_output_tokens": 800
                    }
                )
                bm_insights[bm][partner] = resp.text.strip()
            except Exception as e:
                bm_insights[bm][partner] = f"Error generating insight: {e}"

    return bm_insights


# ================ STEP 4: Extract Partner Name (Helper) ================
def extract_partner_name(text):
    match = re.search(r"Partner\s*:\s*([A-Za-z\s\.\-']+)", text)
    if match:
        return match.group(1).strip()
    return "Unknown Partner"


# ================ STEP 5: Save Insights to MySQL ================
def save_insights_to_mysql(insight_dict, insight_type="Focus Insight"):
    records = []

    for bm, nested_dict in insight_dict.items():
        for partner, insight_text in nested_dict.items():
            partner_name = extract_partner_name(insight_text) if partner == "Unknown Partner" else partner
            records.append({
                "BM": bm,
                "Partner_Name": partner_name,
                "Insight_Type": insight_type,
                "Insight": insight_text
            })

    df_to_save = pd.DataFrame(records)
    df_to_save.to_sql(name="AllInsights", con=engine, if_exists="append", index=False)
    print(f"✅ Saved {len(df_to_save)} insights successfully to MySQL.")


# ================ STEP 6: EXECUTION ================
focus_df = identify_focus_areas(merged_with_hierarchyy)
filtered_df = get_top_bm_partners(focus_df)
bm_focus_insights = generate_partner_focus_insights(filtered_df)

print("\n================= BRANCH MANAGER FOCUS INSIGHTS =================\n")
for bm, partners in bm_focus_insights.items():
    print(f"\n######## Branch Manager: {bm} ########\n")
    for partner, insight in partners.items():
        print(f"\nPartner: {partner}\n")
        print(insight)
        print("-" * 100)
    print("=" * 120)

# Save all insights into database
save_insights_to_mysql(bm_focus_insights, insight_type="Area to Focus")


  .apply(lambda x: x.sort_values('Max Gap', ascending=False).head(top_partners))





######## Branch Manager: BIPIN KUMAR SINGH ########


Partner: Aishwarya Agarwal

Here are the data-driven insights and recommendations for Partner Aishwarya Agarwal:

1.  **Top Performance Gaps:** The most significant performance gaps are in 'SIP Sales Achievement' (13537.39%) and 'Investment Net Sales Achievement' (284.79%). 'Equity Sales' (264.92%) also needs attention.

2.  **Improvement Targets & Partner Gains:** A realistic target is to improve 'SIP Sales Achievement' by 10% and 'Investment Net Sales Achievement' by 20%. Achieving this could lead to a 15% overall performance gain for Aishwarya Agarwal.

3.  **Branch-Level Impact:** If Aishwarya Agarwal achieves the above improvement targets, the total branch performance could improve by an estimated 5%, assuming Aishwarya's contribution is significant.

4.  **Actionable Recommendation:** Branch Manager BIPIN KUMAR SINGH should prioritize coaching Aishwarya Agarwal on SIP sales strategies and investment product knowledge to add

  df_to_save.to_sql(name="AllInsights", con=engine, if_exists="append", index=False)


In [None]:
save_insights_to_mysql(bm_focus_insights, insight_type="Partner Focus Insight")


In [218]:
merged_with_hierarchyy.head()

Unnamed: 0,Sr No.,Partner Code,Partner Name,Center_x,Category,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year,Sr No,Broker Code,Doer Name,Doer Type,Center_y,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement,ZM,SRM,RM,BM
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,1,23676,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82,E-Surat,,MANOJ PATEL,DENISH M. PATEL
1,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,1,23676,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82,E-Surat,,MANOJ PATEL,DENISH M. PATEL
2,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,2,20361,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04,E-Surat,,MANOJ PATEL,DENISH M. PATEL
3,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,2,20361,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04,E-Surat,,MANOJ PATEL,DENISH M. PATEL
4,3,24695,CHINTU KUMAR SHAW,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1200000,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,27000,0.0,0.0,0.0,0.0,2023,3,24695,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1200000,0.0,0.0,,,,27000,0.0,0.0,16,3.0,18.75,0.0,0,,0.0,0.0,2.81,E-Surat,,MANOJ PATEL,DENISH M. PATEL


### ZM Level Insights

In [226]:
import pandas as pd
import json

merged_with_hierarchyy_zm = merged_with_hierarchyy.copy()

kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement',
    'Client Acquisition_% Achievement',
    'LAS_% Achievement'
]

def calculate_partner_concentration(df, kpi_list):
    results = {}

    # Loop through each KPI column
    for kpi in kpi_list:
        if kpi not in df.columns:
            print(f" Skipping missing KPI column: {kpi}")
            continue

        # Group by Zonal Manager (ZM)
        for zm, zm_group in df.groupby("ZM"):
            total = zm_group[kpi].sum()

            if total == 0 or pd.isna(total):
                continue  # skip if there's no performance

            # Aggregate Partner-wise KPI under the ZM
            partner_perf = zm_group.groupby("Partner Name")[kpi].sum().sort_values(ascending=False)

            # Calculate cumulative contribution (%)
            cumulative_share = partner_perf.cumsum() / total

            # Count how many partners contribute to 80%
            partner_count_80 = (cumulative_share <= 0.8).sum() + 1

            # Store results
            if zm not in results:
                results[zm] = {}

            results[zm][kpi] = {
                "total_partners": len(partner_perf),
                "partners_for_80_percent": int(partner_count_80),
                "top_3_partners": [
                    {"name": partner, "contribution": float(value)}
                    for partner, value in partner_perf.head(3).items()
                ],
                "concentration_ratio": round((partner_count_80 / len(partner_perf)) * 100, 2)
            }

    return results

partner_concentration_zm = calculate_partner_concentration(merged_with_hierarchyy_zm, kpi_list)




In [227]:
print(json.dumps(partner_concentration_zm, indent=2))

records = []
for zm, kpi_data in partner_concentration_zm.items():
    for kpi, vals in kpi_data.items():
        records.append({
            "ZM": zm,
            "KPI": kpi,
            "Total Partners": vals["total_partners"],
            "Partners for 80%": vals["partners_for_80_percent"],
            "Concentration Ratio (%)": vals["concentration_ratio"],
            "Top 3 Partners": ", ".join([p["name"] for p in vals["top_3_partners"]])
        })

partner_concentration_df = pd.DataFrame(records)
print(partner_concentration_df.head())


{
  "BHARAT KACHHADIA": {
    "Equity Sales": {
      "total_partners": 2423,
      "partners_for_80_percent": 246,
      "top_3_partners": [
        {
          "name": "MANAVI WEALTH PRIVATE LIMITED",
          "contribution": 1238769947.22
        },
        {
          "name": "SNIKITHA FINSERV PRIVATE LIMITED",
          "contribution": 1188206300.88
        },
        {
          "name": "B S PRABHAKAR",
          "contribution": 769827849.42
        }
      ],
      "concentration_ratio": 10.15
    },
    "SIP Sales Achievement": {
      "total_partners": 2423,
      "partners_for_80_percent": 344,
      "top_3_partners": [
        {
          "name": "Vikram Rangnani",
          "contribution": 23730472.060000002
        },
        {
          "name": "PARIMI VENKATA ATREYUDU",
          "contribution": 20200430.0
        },
        {
          "name": "MANAVI WEALTH PRIVATE LIMITED",
          "contribution": 17164781.6
        }
      ],
      "concentration_ratio": 14.2
    

In [78]:
# Correct Gemini (LangChain) setup
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

google_api_key = google_api_key  # keep it secure
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    google_api_key=google_api_key,
    temperature=0.7
)

# Example prompt
prompt = """
You are a business insight generator for retail investment distribution performance.

The data provided shows the **Partner Concentration** for a specific Zonal Manager (ZM).  
It includes how much of total performance (e.g., Equity Sales, SIP Sales, etc.) comes from top-performing partners under that ZM.

Your task:
Write a clear, structured insight summary explaining the partner concentration pattern and business implications.

Follow this structure:

## Zonal Manager: {ZM_NAME}

### Partner Concentration Insight
- Mention how many partners contribute to 25%, 50%, 75%, and 80% of total performance.
- Comment on whether concentration is **high**, **moderate**, or **balanced** based on those percentages.
- Highlight the top 2–3 partners driving the majority of performance.
- Mention the potential **business risk** if concentration is too high.
- Suggest **diversification or partner development** actions if needed.

Keep the tone analytical but simple — suitable for senior management.  
Avoid mentioning missing data or errors.

Input Data:
{partner_concentration_data_for_ZM}

"""

# Generate LLM output
response = llm.invoke(prompt)
print(response.content)


Okay, I'm ready to analyze the partner concentration data and generate the insight summary. Please provide the data in the format requested. I'll replace `{partner_concentration_data_for_ZM}` with the provided data and produce the output.

For example, the input might look like this:

```json
{
  "ZM_NAME": "John Smith",
  "data": {
    "25_percent_contributors": 3,
    "50_percent_contributors": 7,
    "75_percent_contributors": 15,
    "80_percent_contributors": 18,
    "top_partners": [
      {"partner_name": "Alpha Investments", "contribution_percentage": 15},
      {"partner_name": "Beta Financials", "contribution_percentage": 12},
      {"partner_name": "Gamma Advisors", "contribution_percentage": 8}
    ]
  }
}
```

Once I receive the data, I will generate the insight summary.


In [79]:
import pandas as pd
import google.generativeai as genai

# ================= GEMINI SETUP =================
genai.configure(api_key=google_api_key)

class DeepInsightGenerator:
    """Uses Gemini LLM to generate descriptive & prescriptive insights dynamically."""
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str) -> str:
        try:
            response = self.model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            return f"Error generating insight: {e}"


# ================= CONFIG =================
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement',
    'Client Acquisition_% Achievement',
    'LAS_% Achievement'
]

thresholds = [25, 50, 75, 80, 90]


# ================= BASE FUNCTION =================
def partner_concentration(df, kpi, thresholds=thresholds):
    df_sorted = (
        df.groupby('Partner Name')[kpi]
        .sum()
        .sort_values(ascending=False)
        .reset_index()
    )

    total_kpi = df_sorted[kpi].sum()
    df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
    df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100

    total_partners = len(df_sorted)
    conc_summary = {}

    for t in thresholds:
        # Find how many partners are needed to reach or exceed t% of total KPI
        num_partners = (df_sorted['Cumulative KPI %'] >= t).idxmax() + 1
        conc_summary[f'Partners for {t}%'] = num_partners
        conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)

    return conc_summary, total_partners, df_sorted


# ================= ZONAL MANAGER CONCENTRATION (TOP 3 ZMs) =================
def top_zonal_manager_concentration(df, kpi, top_zms=3, top_bms=2, top_partners=2, thresholds=thresholds):
    zm_dict = {}

    # Find top 3 ZMs by total KPI
    top_zms_list = (
        df.groupby('ZM')[kpi]
        .sum()
        .sort_values(ascending=False)
        .head(top_zms)
        .index
        .tolist()
    )

    for zm in top_zms_list:
        zm_group = df[df['ZM'] == zm]
        zm_summary = {}

        # Find top 2 BMs under each ZM
        top_bms_list = (
            zm_group.groupby('BM')[kpi]
            .sum()
            .sort_values(ascending=False)
            .head(top_bms)
            .index
            .tolist()
        )

        for bm in top_bms_list:
            bm_group = zm_group[zm_group['BM'] == bm]
            conc_summary, total_partners, df_sorted = partner_concentration(bm_group, kpi, thresholds)

            bm_info = {
                "Total Partners": total_partners,
                "Partner Concentration Summary": conc_summary,
                "Top Partners": df_sorted.head(top_partners)[['Partner Name', kpi, 'Cumulative KPI %']].to_dict(orient='records')
            }

            zm_summary[bm] = bm_info

        zm_dict[zm] = zm_summary

    return zm_dict


# ================= GEMINI INSIGHTS =================
def generate_zonal_manager_insights(df, kpi_list, generator, top_zms=3, top_bms=2, top_partners=2):
    zm_all_insights = {}

    for kpi in kpi_list:
        zm_dict = top_zonal_manager_concentration(df, kpi, top_zms, top_bms, top_partners)

        for zm, bm_data in zm_dict.items():
            text_summary = f"Zonal Manager: {zm}\nKPI: {kpi}\n"
            for bm, bm_info in bm_data.items():
                text_summary += f"\nBranch Manager: {bm}\nTotal Partners: {bm_info['Total Partners']}\n"
                for t, val in bm_info["Partner Concentration Summary"].items():
                    text_summary += f"  - {t}: {val}\n"

                text_summary += "\nTop Partners:\n"
                for partner in bm_info["Top Partners"]:
                    text_summary += f"  - {partner['Partner Name']}: {partner[kpi]:,.2f} ({partner['Cumulative KPI %']:.2f}% cumulative)\n"

            prompt = f"""
            You are a zonal performance analyst.
            Below is data for Zonal Manager '{zm}' showing branch managers and partner concentration across KPI '{kpi}'.
            Write 2–3 crisp insights covering:
            1. Performance concentration across BMs and partners,
            2. Risk of dependency on few performers,
            3. Potential improvement actions for low-performing branches.
            
            Data:
            {text_summary}
            """

            insight = generator.generate_insight(prompt)
            zm_all_insights.setdefault(zm, {})[kpi] = {
                "hierarchy_data": bm_data,
                "raw_text": text_summary,
                "llm_insight": insight
            }

    return zm_all_insights


# ================= RUN =================
generator = DeepInsightGenerator()

zonal_manager_insights = generate_zonal_manager_insights(
    merged_with_hierarchyy, kpi_list, generator, top_zms=3, top_bms=2, top_partners=2
)

# ================= DISPLAY =================
for zm, kpis in zonal_manager_insights.items():
    print(f"\n######## Zonal Manager: {zm} ########")
    for kpi, insight_data in kpis.items():
        print(f"\n=== {kpi} ===")
        print(insight_data["raw_text"])
        print("\nLLM Insights:\n", insight_data["llm_insight"])
        print("-" * 100)



######## Zonal Manager: SARFARAZ ABDULLA PATEL ########

=== Equity Sales ===
Zonal Manager: SARFARAZ ABDULLA PATEL
KPI: Equity Sales

Branch Manager: NAKUL D. CHATRE
Total Partners: 25097
  - Partners for 25%: 196
  - % of Total Partners for 25%: 0.78
  - Partners for 50%: 793
  - % of Total Partners for 50%: 3.16
  - Partners for 75%: 2108
  - % of Total Partners for 75%: 8.4
  - Partners for 80%: 2522
  - % of Total Partners for 80%: 10.05
  - Partners for 90%: 3614
  - % of Total Partners for 90%: 14.4

Top Partners:
  - VA Financial Products Distribution LLP: 3,279,036,661.22 (1.20% cumulative)
  - MARZEE MAIDHYOMAH KERAWALA: 2,195,813,474.10 (2.01% cumulative)


LLM Insights:
 Here are a few insights based on the provided data for Zonal Manager Sarfaraz Abdulla Patel, focusing on Nakul D. Chatre's branch and its Equity Sales performance:

*   **Performance Concentration:** Nakul D. Chatre's Equity Sales are concentrated among a relatively small percentage of partners. Approximat

##### Part Conc. -New approach

In [62]:
import pandas as pd
import google.generativeai as genai

# ================= GEMINI SETUP =================
genai.configure(api_key=google_api_key)

class DeepInsightGenerator:
    """Uses Gemini LLM to generate descriptive & prescriptive insights dynamically."""
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str) -> str:
        try:
            response = self.model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            return f"Error generating insight: {e}"


# ================= CONFIG =================
kpi_list = [
    'Equity Sales',
    'SIP Sales Achievement',
    'Net Sales through MARS',
    'Investment Net Sales Achievement',
    'Client Acquisition_% Achievement',
    'LAS_% Achievement'
]

thresholds = [25, 50, 75, 80, 90]


# ================= BASE FUNCTION =================
def partner_concentration(df, kpi, thresholds=thresholds):
    df_sorted = (
        df.groupby('Partner Name')[kpi]
        .sum()
        .sort_values(ascending=False)
        .reset_index()
    )

    total_kpi = df_sorted[kpi].sum()
    df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
    df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100

    total_partners = len(df_sorted)
    conc_summary = {}

    for t in thresholds:
        # Find how many partners are needed to reach or exceed t% of total KPI
        num_partners = (df_sorted['Cumulative KPI %'] >= t).idxmax() + 1
        conc_summary[f'Partners for {t}%'] = num_partners
        conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)

    return conc_summary, total_partners, df_sorted


# ================= ZONAL MANAGER CONCENTRATION =================
def top_zonal_manager_concentration(df, kpi, top_zms=3, top_bms=2, top_partners=2, thresholds=thresholds):
    zm_dict = {}

    # Find top ZMs by total KPI
    top_zms_list = (
        df.groupby('ZM')[kpi]
        .sum()
        .sort_values(ascending=False)
        .head(top_zms)
        .index
        .tolist()
    )

    for zm in top_zms_list:
        zm_group = df[df['ZM'] == zm]
        zm_summary = {}

        # Find top BMs under each ZM
        top_bms_list = (
            zm_group.groupby('BM')[kpi]
            .sum()
            .sort_values(ascending=False)
            .head(top_bms)
            .index
            .tolist()
        )

        for bm in top_bms_list:
            bm_group = zm_group[zm_group['BM'] == bm]
            conc_summary, total_partners, df_sorted = partner_concentration(bm_group, kpi, thresholds)

            bm_info = {
                "Total Partners": total_partners,
                "Partner Concentration Summary": conc_summary,
                "Top Partners": df_sorted.head(top_partners)[['Partner Name', kpi, 'Cumulative KPI %']].to_dict(orient='records')
            }

            zm_summary[bm] = bm_info

        zm_dict[zm] = zm_summary

    return zm_dict


# ================= GEMINI INSIGHTS =================
def generate_zonal_manager_insights(df, kpi_list, generator, top_zms=3, top_bms=2, top_partners=2, thresholds=thresholds):
    zm_all_insights = {}

    for kpi in kpi_list:
        zm_dict = top_zonal_manager_concentration(df, kpi, top_zms, top_bms, top_partners, thresholds)

        for zm, bm_data in zm_dict.items():
            text_summary = f"Zonal Manager: {zm}\nKPI: {kpi}\n"

            # --- Compact BM summary (Dynamic Thresholds) ---
            for bm, bm_info in bm_data.items():
                conc = bm_info["Partner Concentration Summary"]
                total = bm_info["Total Partners"]

                parts = []
                for t in thresholds:
                    partners = conc.get(f'Partners for {t}%', '-')
                    pct_total = conc.get(f'% of Total Partners for {t}%', '-')
                    parts.append(f"{partners} ({pct_total}%) drive {t}%")

                text_summary += f"\n{bm}: {total} partners → " + ", ".join(parts) + ".\n"

                # Add top partner info
                top_partners_data = bm_info["Top Partners"]
                top_lines = [
                    f"{p['Partner Name']} ({p['Cumulative KPI %']:.2f}% cumulative)"
                    for p in top_partners_data
                ]
                text_summary += f"Top Partners: {', '.join(top_lines)}\n"

            # --- Generate LLM Insights ---
            prompt = f"""
            You are a zonal performance analyst.
            Below is data for Zonal Manager '{zm}' showing branch managers and partner concentration across KPI '{kpi}'.
            Write 2–3 crisp insights covering:
            1. Performance concentration across BMs and partners,
            2. Risk of dependency on few performers,
            3. Potential improvement actions for low-performing branches.

            Data:
            {text_summary}
            """

            insight = generator.generate_insight(prompt)
            zm_all_insights.setdefault(zm, {})[kpi] = {
                "hierarchy_data": bm_data,
                "raw_text": text_summary,
                "llm_insight": insight
            }

    return zm_all_insights


# ================= RUN =================
generator = DeepInsightGenerator()

zonal_manager_insights = generate_zonal_manager_insights(
    merged_with_hierarchyy, kpi_list, generator, top_zms=3, top_bms=2, top_partners=2
)

# ================= DISPLAY =================
for zm, kpis in zonal_manager_insights.items():
    print(f"\n######## Zonal Manager: {zm} ########")
    for kpi, insight_data in kpis.items():
        print(f"\n=== {kpi} ===")
        print(insight_data["raw_text"])
        print("\nLLM Insights:\n", insight_data["llm_insight"])
        print("-" * 100)



######## Zonal Manager: SARFARAZ ABDULLA PATEL ########

=== Equity Sales ===
Zonal Manager: SARFARAZ ABDULLA PATEL
KPI: Equity Sales

AJINKYA BHIMRAO GURAV: 424 partners → 5 (1.18%) drive 25%, 14 (3.3%) drive 50%, 43 (10.14%) drive 75%, 53 (12.5%) drive 80%, 86 (20.28%) drive 90%.
Top Partners: ARVIND S LATKAR (7.95% cumulative), VINODKUMAR K UPADHYAY (15.70% cumulative)

RAVIKIRAN GOVIND MORE: 348 partners → 5 (1.44%) drive 25%, 18 (5.17%) drive 50%, 50 (14.37%) drive 75%, 59 (16.95%) drive 80%, 85 (24.43%) drive 90%.
Top Partners: Shilpa Umesh Bagdi (6.61% cumulative), Sarika Rohit Chandak (12.23% cumulative)


LLM Insights:
 Here are a few insights based on the Equity Sales performance data for Zonal Manager SARFARAZ ABDULLA PATEL's team:

1.  **High Performance Concentration:** In both branches managed by AJINKYA BHIMRAO GURAV and RAVIKIRAN GOVIND MORE, a small percentage of partners drive a significant portion of the Equity Sales. For example, in Ajinkya's branch, just 1.18% of 

In [218]:
def clean_broker_code(df):
    df['Broker Code'] = (
        df['Broker Code']
        .astype(str)
        .str.strip()                # remove leading/trailing spaces
        .str.replace(r'\.0$', '', regex=True)  # remove trailing .0
        .str.replace(r'[^0-9A-Za-z]', '', regex=True)  # remove stray characters
    )
    return df

merged_df = clean_broker_code(merged_df)
MIS_total = clean_broker_code(MIS_total)


#### ZM insights(all columns)   -- MIS data combined

In [53]:
merged_with_hierarchy_mis= pd.merge(
    merged_df,
    MIS_total,
    on='Broker Code',
    how='left'
)

In [54]:
merged_with_hierarchy_mis.head()

Unnamed: 0,Sr No.,Partner Code,Partner Name_x,Center_x,Category_x,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year_x,Sr No,Broker Code,Doer Name_x,Doer Type_x,Center_y,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement_x,ZM,SRM,RM,BM,Partner Name_y,Category_y,Doer Name_y,Doer Type_y,Center,Age in NJ,Age in NJ Target,Total AUM FY 23-24 Q4 YTD,Equity+ Hyb AUM FY 23-24 Q4 YTD,LIVE SIP FY 23-24 Q4 YTD,Total Net Sales\nFY 23-24 YTD,Equity Net Sales\nFY 23-24 YTD,Net SIP\nFY 23-24 YTD,MARS AUM FY 23-24 YTD,MARS Net Sales FY 23-24 YTD,PMS AUM FY 23-24 YTD,Net Sales FY 23-24 YTD,Clients Acquired FY 23-24 YTD,Live Accounts FY 23-24 YTD,Saturday School (YTD) FY 23-24 Q4 YTD,Investment\nSaturday School (YTD) FY 23-24 Q4 YTD,Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD,Total Group FY 23-24 Q4 YTD,Group Covered FY 23-24 Q4 YTD,% Covered,Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr),Total Reviews\nFY 23-24 Q4 YTD,Amount FY 23-24 Q4 YTD\n(in Cr),Flexicap Target\nFY 23-24 Q4 YTD\n(in Cr),Flexicap Ach\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Target\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Ach\nFY 23-24 Q4 YTD\n(in Cr),Target Qty (FY 23-24 Q4 YTD)\n(in Cr),Order Qty\n(FY 23-24 Q4 YTD)\n(in Cr),Equity Net Sales% Achievement,SIP Sales% Achievement,Client Acquisition% Achievement,Insurance% Achievement,LAS% Achievement,Total % Achievement_y,Status,FY_Year_y,Total AUM FY 24-25 Q4 YTD,Equity AUM FY 24-25 Q4 YTD,LIVE SIP FY 24-25 Q4 YTD,Total Net Sales\nFY 24-25 Q4 YTD,Equity Net Sales\nFY 24-25 Q4 YTD,Net SIP\nFY 24-25 Q4 YTD,MARS AUM FY 24-25 Q4 YTD,MARS Net Sales FY 24-25 Q4 YTD,PMS AUM FY 24-25 Q4 YTD,Net Sales FY 24-25 Q4 YTD,Clients Acquired FY 24-25 Q4 YTD,Live Accounts (Non-D) FY 24-25 Q4 YTD,Saturday School (YTD) FY 24-25 Q4 YTD,Investment\nSaturday School (YTD) FY 24-25 Q4 YTD,Insurance\nSaturday School (YTD) FY 24-25 Q4 YTD,Total Group FY 24-25 Q4 YTD,Group Covered FY 24-25 Q4 YTD,Total Reviews\nFY 24-25 Q4 YTD,Non-NJ AUM\nFY 24-25 Q4 YTD,Amount FY 24-25 Q4 YTD,Flexicap Target\nFY 24-25 Q4 YTD,Flexicap Ach\nFY 24-25 Q4 YTD,AMC NS Target\nFY 24-25 Q4 YTD,AMC NS Ach\nFY 24-25 Q4 YTD,Target Qty (FY 24-25 Q4 YTD)\n(in Cr),Order Qty\n(FY 24-25 Q4 YTD)\n(in Cr),MARS TO EQ AUM
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,1,23676,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82,E-Surat,,MANOJ PATEL,DENISH M. PATEL,ALOKE CHATTERJEE,D,YOGESHBHAI H. ANJARA,UNIT MANAGER,E - SURAT,16,16,1957855.85,1875223.02,9999.51,1600911.06,1524923.86,4999.75,1878565.0,1615000.0,0.0,0.0,4.0,4.0,,,,,,,,,100000.0,,,,,,,98.16,,8.17,16.67,200,59.44,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,1,23676,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0.0,4999.76,1.67,17.82,E-Surat,,MANOJ PATEL,DENISH M. PATEL,ALOKE CHATTERJEE,D,YOGESHBHAI H. ANJARA,UNIT MANAGER,E - SURAT,28,28,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,7.83,,11.99,23.53,200,38.25,MUTUAL FUND-Active,2025,2353909.98,2320608.98,20998.98,228882.761,281902.868,10999.47,2144205.0,182000.0,0.0,0.0,6.0,7.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,200000.0,0.0,1699.92,0.0,30694.55,0.0,0.0,0.923984
2,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,2,20361,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04,E-Surat,,MANOJ PATEL,DENISH M. PATEL,ARINDAM CHAKRAVARTI,D,YOGESHBHAI H. ANJARA,UNIT MANAGER,E - SURAT,21,21,3626626.4,3560768.19,109994.52,1557645.832,1574721.26,57997.1,2715004.0,1202730.35,0.0,0.0,0.0,12.0,,,,,,,,,0.0,,,,,,,73.93,6.01,82.62,0.0,0,41.07,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,2,20361,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0.0,111994.42,37.33,41.04,E-Surat,,MANOJ PATEL,DENISH M. PATEL,ARINDAM CHAKRAVARTI,D,YOGESHBHAI H. ANJARA,UNIT MANAGER,E - SURAT,33,33,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,-27.3,9.96,30.23,3.92,0,6.69,MUTUAL FUND-Active,2025,3191810.09,3141045.95,127993.62,-842921.918,-824241.271,17999.1,3543512.0,2156.71,0.0,0.0,1.0,12.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10595.47,0.0,-74064.056,0.0,0.0,1.128131
4,3,24695,CHINTU KUMAR SHAW,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1200000,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,27000,0.0,0.0,0.0,0.0,2023,3,24695,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1200000,0.0,0.0,,,,27000,0.0,0.0,16,3.0,18.75,0.0,0,,0.0,0.0,2.81,E-Surat,,MANOJ PATEL,DENISH M. PATEL,CHINTU KUMAR SHAW,D,YOGESHBHAI H. ANJARA,UNIT MANAGER,E - SURAT,15,15,31224.54,31224.54,8999.56,27998.67,27998.67,8999.56,0.0,0.0,0.0,0.0,1.0,2.0,,,,,,,,,0.0,,,,,,,1.88,,18.1,4.17,0,8.58,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [55]:
rowsss = MIS_total[MIS_total['Broker Code']=='34385']

In [61]:
rowsss = merged_with_hierarchy_mis[merged_with_hierarchy_mis['Broker Code']=='34385']

In [62]:
rowsss

Unnamed: 0,Sr No.,Partner Code,Partner Name_x,Center_x,Category_x,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year_x,Sr No,Broker Code,Doer Name_x,Doer Type_x,Center_y,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement_x,ZM,SRM,RM,BM,Partner Name_y,Category_y,Doer Name_y,Doer Type_y,Center,Age in NJ,Age in NJ Target,Total AUM FY 23-24 Q4 YTD,Equity+ Hyb AUM FY 23-24 Q4 YTD,LIVE SIP FY 23-24 Q4 YTD,Total Net Sales\nFY 23-24 YTD,Equity Net Sales\nFY 23-24 YTD,Net SIP\nFY 23-24 YTD,MARS AUM FY 23-24 YTD,MARS Net Sales FY 23-24 YTD,PMS AUM FY 23-24 YTD,Net Sales FY 23-24 YTD,Clients Acquired FY 23-24 YTD,Live Accounts FY 23-24 YTD,Saturday School (YTD) FY 23-24 Q4 YTD,Investment\nSaturday School (YTD) FY 23-24 Q4 YTD,Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD,Total Group FY 23-24 Q4 YTD,Group Covered FY 23-24 Q4 YTD,% Covered,Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr),Total Reviews\nFY 23-24 Q4 YTD,Amount FY 23-24 Q4 YTD\n(in Cr),Flexicap Target\nFY 23-24 Q4 YTD\n(in Cr),Flexicap Ach\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Target\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Ach\nFY 23-24 Q4 YTD\n(in Cr),Target Qty (FY 23-24 Q4 YTD)\n(in Cr),Order Qty\n(FY 23-24 Q4 YTD)\n(in Cr),Equity Net Sales% Achievement,SIP Sales% Achievement,Client Acquisition% Achievement,Insurance% Achievement,LAS% Achievement,Total % Achievement_y,Status,FY_Year_y,Total AUM FY 24-25 Q4 YTD,Equity AUM FY 24-25 Q4 YTD,LIVE SIP FY 24-25 Q4 YTD,Total Net Sales\nFY 24-25 Q4 YTD,Equity Net Sales\nFY 24-25 Q4 YTD,Net SIP\nFY 24-25 Q4 YTD,MARS AUM FY 24-25 Q4 YTD,MARS Net Sales FY 24-25 Q4 YTD,PMS AUM FY 24-25 Q4 YTD,Net Sales FY 24-25 Q4 YTD,Clients Acquired FY 24-25 Q4 YTD,Live Accounts (Non-D) FY 24-25 Q4 YTD,Saturday School (YTD) FY 24-25 Q4 YTD,Investment\nSaturday School (YTD) FY 24-25 Q4 YTD,Insurance\nSaturday School (YTD) FY 24-25 Q4 YTD,Total Group FY 24-25 Q4 YTD,Group Covered FY 24-25 Q4 YTD,Total Reviews\nFY 24-25 Q4 YTD,Non-NJ AUM\nFY 24-25 Q4 YTD,Amount FY 24-25 Q4 YTD,Flexicap Target\nFY 24-25 Q4 YTD,Flexicap Ach\nFY 24-25 Q4 YTD,AMC NS Target\nFY 24-25 Q4 YTD,AMC NS Ach\nFY 24-25 Q4 YTD,Target Qty (FY 24-25 Q4 YTD)\n(in Cr),Order Qty\n(FY 24-25 Q4 YTD)\n(in Cr),MARS TO EQ AUM
41817,1210,34385,AADHYANSH FINANCE,AHMEDNAGAR,NON D,SACHIN N RANE,756000,629985,83.33,299985,0,0,0,0,0,0,0,0,0,330000,18900,0,0,0,0,2024,1210,34385,SACHIN N RANE,Fundz Express,AHMEDNAGAR,756000,629985,83.33,,,,18900,0,0,8,2,25.0,0.0,0,,0,0,27.22,SARFARAZ ABDULLA PATEL,,VIKRAMSINGH PARDESHI,Manish S. Pawar,AADHYANSH FINANCE,NON D,SACHIN N RANE,Fundz Express,AHMEDNAGAR,1,1,304984.06,304984.06,0,299985.02,299985.02,0,51102,150000,0,0,1,1,,,,,,,,,0,,,,,,,83.33,,0,25,,27.22,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,
86299,1454,34385,AADHYANSH FINANCE,AHMEDNAGAR,NON D,SACHIN N RANE,5000940,4895797,97.9,4059797,0,0,0,0,0,0,0,0,0,836000,123045,0,0,0,0,2025,1454,34385,SACHIN N RANE,Fundz Express,AHMEDNAGAR,5000940,4895797,97.9,,,,123045,0,0,51,2,3.92,2701.98,0,0.0,0,0,26.89,SARFARAZ ABDULLA PATEL,,VIKRAMSINGH PARDESHI,Manish S. Pawar,AADHYANSH FINANCE,NON D,SACHIN N RANE,Fundz Express,AHMEDNAGAR,1,1,304984.06,304984.06,0,299985.02,299985.02,0,51102,150000,0,0,1,1,,,,,,,,,0,,,,,,,83.33,,0,25,,27.22,MUTUAL FUND-Active,2024,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [59]:
merged_with_hierarchy_mis = merged_with_hierarchy_mis.drop_duplicates(
    subset=['Broker Code', 'FY_Year_x'],
    keep='first'
)

In [103]:
merged_with_hierarchy_mis = merged_with_hierarchy_mis.drop_duplicates(subset='Sr No.', keep='first')


In [120]:
merged_df['Broker Code'] = merged_df['Broker Code'].astype(str).str.strip()
MIS_total['Broker Code'] = MIS_total['Broker Code'].astype(str).str.strip()


In [64]:
merged_with_hierarchy_mis.columns

Index(['Sr No.', 'Partner Code', 'Partner Name_x', 'Center_x', 'Category_x',
       'Relationship Handler', 'Investment Net Sales Target',
       'Investment Net Sales Achievement',
       'Investment Net Sales % Achievement', 'Equity Sales',
       ...
       'Total Reviews\nFY 24-25 Q4 YTD', 'Non-NJ AUM\nFY 24-25 Q4 YTD',
       'Amount FY 24-25 Q4 YTD', 'Flexicap Target\nFY 24-25 Q4 YTD',
       'Flexicap Ach\nFY 24-25 Q4 YTD', 'AMC NS Target\nFY 24-25 Q4 YTD',
       'AMC NS Ach\nFY 24-25 Q4 YTD', 'Target Qty (FY 24-25 Q4 YTD)\n(in Cr)',
       'Order Qty\n(FY 24-25 Q4 YTD)\n(in Cr)', 'MARS TO EQ AUM'],
      dtype='object', length=122)

In [68]:
merged_with_hierarchy_mis.columns = (
    merged_with_hierarchy_mis.columns
    .str.replace('\n', ' ', regex=True)  # replace newline with space
    .str.replace('\r', '', regex=True)   # remove carriage returns (just in case)
    .str.strip()                         # remove leading/trailing spaces
)

### ZM - Partner Concentration

In [109]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ------------------ GEMINI SETUP ------------------
genai.configure(api_key=google_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

# ------------------ CLEAN COLUMN NAMES ------------------
def clean_columns(df):
    df.columns = df.columns.str.strip().str.replace("\n", " ").str.replace("\r", " ")
    return df

# ------------------ ADD KPI COLUMNS ------------------
def add_ratio_kpis(df):
    def safe_col(name):
        return df[name] if name in df.columns else pd.Series(0, index=df.index)

    df["Total AUM Combined"] = (
        safe_col("Total AUM FY 23-24 Q4 YTD").fillna(0)
        + safe_col("Total AUM FY 24-25 Q4 YTD").fillna(0)
    )
    total_aum = df["Total AUM Combined"].replace(0, np.nan)

    df["Equity Sales as % of Total AUM"] = np.divide(
        (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")) * 100, total_aum
    )

    df["Live SIP As % of Total AUM"] = np.divide(
        (safe_col("LIVE SIP FY 23-24 Q4 YTD") + safe_col("LIVE SIP FY 24-25 Q4 YTD")) * 100, total_aum
    )

    df["PMS AUM as % of Total AUM"] = np.divide(
        (safe_col("PMS AUM FY 23-24 YTD") + safe_col("PMS AUM FY 24-25 YTD")) * 100, total_aum
    )

    df["SIP Closure to Net SIP %"] = np.divide(
        (safe_col("SIP Closure FY 23-24 Q4 YTD") + safe_col("SIP Closure FY 24-25 Q4 YTD")) * 100,
        (safe_col("Net SIP FY 23-24 YTD") + safe_col("Net SIP FY 24-25 YTD")).replace(0, np.nan),
    )

    df["MARS AUM as % Total AUM"] = np.divide(
        (safe_col("MARS AUM FY 23-24 YTD") + safe_col("MARS AUM FY 24-25 YTD")) * 100, total_aum
    )

    df["MARS Net Sales as % Equity Sales"] = np.divide(
        (safe_col("MARS Net Sales FY 23-24 YTD") + safe_col("MARS Net Sales FY 24-25 YTD")) * 100,
        (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")).replace(0, np.nan),
    )

    return df

# ------------------ PARTNER CONCENTRATION CALCULATOR ------------------
def partner_concentration_hierarchy(df, kpi_col):
    records = []

    for zm, zm_group in df.groupby("ZM"):
        for bm, bm_group in zm_group.groupby("BM"):
            for rh, rh_group in bm_group.groupby("Relationship Handler"):
                if kpi_col not in rh_group.columns:
                    continue

                partner_sum = (
                    rh_group.groupby("Partner Name_x")[kpi_col]
                    .sum()
                    .sort_values(ascending=False)
                )

                total = partner_sum.sum()
                if total == 0 or np.isnan(total):
                    continue

                cumsum = partner_sum.cumsum() / total * 100
                partners_25 = (cumsum <= 25).sum()
                partners_50 = (cumsum <= 50).sum()
                partners_80 = (cumsum <= 80).sum()

                top_partner = partner_sum.index[0]
                top_partner_share = partner_sum.iloc[0] / total * 100

                records.append({
                    "ZM": zm,
                    "BM": bm,
                    "Relationship Handler": rh,
                    "KPI": kpi_col,
                    "Total Partners": len(partner_sum),
                    "Partners for 25%": partners_25,
                    "Partners for 50%": partners_50,
                    "Partners for 80%": partners_80,
                    "Top Partner": top_partner,
                    "Top Partner Share %": round(top_partner_share, 2),
                    "Total Value": total
                })

    return pd.DataFrame(records)

# ------------------ INSIGHT GENERATOR ------------------
class DeepInsightGenerator:
    def __init__(self, model):
        self.model = model

    def generate_zm_insights(self, df_summary):
        insights = {}

        for zm in df_summary["ZM"].unique():
            zm_df = df_summary[df_summary["ZM"] == zm]

            prompt_text = ""
            for kpi in zm_df["KPI"].unique():
                kpi_df = zm_df[zm_df["KPI"] == kpi]

                # Find top 2 BMs by total KPI value
                top_bms = (
                    kpi_df.groupby("BM")["Total Value"].sum().sort_values(ascending=False).head(2).index.tolist()
                )

                prompt_text += f"\n=== KPI: {kpi} ===\n"
                for bm in top_bms:
                    bm_df = kpi_df[kpi_df["BM"] == bm]
                    prompt_text += f"\nBranch Manager: {bm}\n"

                    for rh in bm_df["Relationship Handler"].unique():
                        rh_df = bm_df[bm_df["Relationship Handler"] == rh]
                        for _, row in rh_df.iterrows():
                            prompt_text += (
                                f"  RH: {row['Relationship Handler']} | "
                                f"Top Partner: {row['Top Partner']} ({row['Top Partner Share %']}%) | "
                                f"Partners covering 25%: {row['Partners for 25%']}, "
                                f"50%: {row['Partners for 50%']}, "
                                f"80%: {row['Partners for 80%']}\n"
                            )

            prompt = f"""
You are a financial performance analyst. 
Below is data for Zonal Manager '{zm}' showing partner contribution across Branch Managers (BMs) and Relationship Heads (RHs).

Analyze partner concentration and business dependency. For each KPI, provide 3–4 concise insights (~300–400 words total) covering:

1. Overall concentration level (high/medium/low) across the zone.  
2. BMs or RHs heavily dependent on few partners (risk areas).  
3. Key quantified highlights (e.g., top X% partners contributing Y% of KPI).  
4. Strategic recommendations for diversification, partner development, and reducing dependency risk.


Data Summary:
{prompt_text}
            """

            try:
                response = self.model.generate_content(prompt)
                insights[zm] = response.text.strip()
            except Exception as e:
                insights[zm] = f"Error generating insight: {e}"

        return insights

# ------------------ RUN PIPELINE ------------------
merged_with_hierarchy_mis = clean_columns(merged_with_hierarchy_mis)
merged_with_hierarchy_mis = add_ratio_kpis(merged_with_hierarchy_mis)

kpi_list = [
    "Equity Sales as % of Total AUM",
    "Live SIP As % of Total AUM",
    "PMS AUM as % of Total AUM",
    "SIP Closure to Net SIP %",
    "MARS AUM as % Total AUM",
    "MARS Net Sales as % Equity Sales",
]

summary_df = pd.concat([
    partner_concentration_hierarchy(merged_with_hierarchy_mis, kpi) for kpi in kpi_list
])

insight_gen = DeepInsightGenerator(model)
hierarchy_insights = insight_gen.generate_zm_insights(summary_df)

# ------------------ DISPLAY ------------------
for zm, text in hierarchy_insights.items():
    print(f"\n\n########## ZONAL MANAGER: {zm} ##########\n{text}\n")


  safe_col("Total AUM FY 23-24 Q4 YTD").fillna(0)
  + safe_col("Total AUM FY 24-25 Q4 YTD").fillna(0)
  (safe_col("Net SIP FY 23-24 YTD") + safe_col("Net SIP FY 24-25 YTD")).replace(0, np.nan),
  (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")).replace(0, np.nan),




########## ZONAL MANAGER: BHARAT KACHHADIA ##########
Okay, let's analyze the partner concentration and business dependency for Zonal Manager Bharat Kachhadia, based on the provided data.

**Overall Concentration Level:**

Across all KPIs, the overall concentration level is **HIGH**, particularly in PMS AUM and MARS AUM, but also noticeably in Equity Sales and MARS Net Sales. A significant number of RHs are heavily reliant on a single or a small handful of partners for a large proportion of their AUM and/or Sales within each KPI. This creates a high risk profile for the Zone.

**BMs/RHs Heavily Dependent on Few Partners (Risk Areas):**

Several BMs and RHs exhibit extreme dependency on a limited number of partners, signaling potential vulnerabilities:

*   **Equity Sales:** N Chakradhar (BM BODAIAHGARI CHENNA KISHOR REDDY) is overwhelmingly dependent on JANAKI RAMA RAJU PENMATSA (89.65% of Equity Sales). TRIPATINATH RAM NARESH ROY (BM MAHESH BABU VEMALA) generates 100% equity sales f

In [202]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ------------------ GEMINI SETUP ------------------
genai.configure(api_key=google_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

# ------------------ CLEAN COLUMN NAMES ------------------
def clean_columns(df):
    df.columns = df.columns.str.strip().str.replace("\n", " ").str.replace("\r", " ")
    return df

# ------------------ ADD KPI COLUMNS ------------------
def add_ratio_kpis(df):
    def safe_col(name):
        return df[name] if name in df.columns else pd.Series(0, index=df.index)

    df["Total AUM Combined"] = (
        safe_col("Total AUM FY 23-24 Q4 YTD").fillna(0)
        + safe_col("Total AUM FY 24-25 Q4 YTD").fillna(0)
    )
    total_aum = df["Total AUM Combined"].replace(0, np.nan)

    df["Equity Sales as % of Total AUM"] = np.divide(
        (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")) * 100, total_aum
    )

    df["Live SIP As % of Total AUM"] = np.divide(
        (safe_col("LIVE SIP FY 23-24 Q4 YTD") + safe_col("LIVE SIP FY 24-25 Q4 YTD")) * 100, total_aum
    )

    df["PMS AUM as % of Total AUM"] = np.divide(
        (safe_col("PMS AUM FY 23-24 YTD") + safe_col("PMS AUM FY 24-25 YTD")) * 100, total_aum
    )

    df["SIP Closure to Net SIP %"] = np.divide(
        (safe_col("SIP Closure FY 23-24 Q4 YTD") + safe_col("SIP Closure FY 24-25 Q4 YTD")) * 100,
        (safe_col("Net SIP FY 23-24 YTD") + safe_col("Net SIP FY 24-25 YTD")).replace(0, np.nan),
    )

    df["MARS AUM as % Total AUM"] = np.divide(
        (safe_col("MARS AUM FY 23-24 YTD") + safe_col("MARS AUM FY 24-25 YTD")) * 100, total_aum
    )

    df["MARS Net Sales as % Equity Sales"] = np.divide(
        (safe_col("MARS Net Sales FY 23-24 YTD") + safe_col("MARS Net Sales FY 24-25 YTD")) * 100,
        (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")).replace(0, np.nan),
    )

    return df

# ------------------ PARTNER CONCENTRATION CALCULATOR ------------------
def partner_concentration_hierarchy(df, kpi_col):
    records = []

    for zm, zm_group in df.groupby("ZM"):
        for bm, bm_group in zm_group.groupby("BM"):
            for rh, rh_group in bm_group.groupby("Relationship Handler"):
                if kpi_col not in rh_group.columns:
                    continue

                partner_sum = (
                    rh_group.groupby("Partner Name_x")[kpi_col]
                    .sum()
                    .sort_values(ascending=False)
                )

                total = partner_sum.sum()
                if total == 0 or np.isnan(total):
                    continue

                cumsum = partner_sum.cumsum() / total * 100
                partners_25 = (cumsum <= 25).sum()
                partners_50 = (cumsum <= 50).sum()
                partners_80 = (cumsum <= 80).sum()

                top_partner = partner_sum.index[0]
                top_partner_share = partner_sum.iloc[0] / total * 100

                records.append({
                    "ZM": zm,
                    "BM": bm,
                    "Relationship Handler": rh,
                    "KPI": kpi_col,
                    "Total Partners": len(partner_sum),
                    "Partners for 25%": partners_25,
                    "Partners for 50%": partners_50,
                    "Partners for 80%": partners_80,
                    "Top Partner": top_partner,
                    "Top Partner Share %": round(top_partner_share, 2),
                    "Total Value": total
                })

    return pd.DataFrame(records)

# ------------------ INSIGHT GENERATOR ------------------
class DeepInsightGenerator:
    def __init__(self, model):
        self.model = model

    def generate_zm_insights(self, df_summary):
        insights = {}

        for zm in df_summary["ZM"].unique():
            zm_df = df_summary[df_summary["ZM"] == zm]

            prompt_text = ""
            for kpi in zm_df["KPI"].unique():
                kpi_df = zm_df[zm_df["KPI"] == kpi]

                # Find top 2 BMs by total KPI value
                top_bms = (
                    kpi_df.groupby("BM")["Total Value"].sum().sort_values(ascending=False).head(2).index.tolist()
                )

                prompt_text += f"\n=== KPI: {kpi} ===\n"
                for bm in top_bms:
                    bm_df = kpi_df[kpi_df["BM"] == bm]
                    prompt_text += f"\nBranch Manager: {bm}\n"

                    for rh in bm_df["Relationship Handler"].unique():
                        rh_df = bm_df[bm_df["Relationship Handler"] == rh]
                        for _, row in rh_df.iterrows():
                            prompt_text += (
                                f"  RH: {row['Relationship Handler']} | "
                                f"Top Partner: {row['Top Partner']} ({row['Top Partner Share %']}%) | "
                                f"Partners covering 25%: {row['Partners for 25%']}, "
                                f"50%: {row['Partners for 50%']}, "
                                f"80%: {row['Partners for 80%']}\n"
                            )

            prompt = f"""
You are a financial performance analyst. 
Below is data for Zonal Manager '{zm}' showing partner contribution across Branch Managers (BMs) and Relationship Heads (RHs).

Analyze partner concentration and business dependency. For each KPI, provide 3–4 concise insights (~300–400 words total) covering:

1. Overall concentration level (high/medium/low) across the zone.  
2. BMs or RHs heavily dependent on few partners (risk areas).  
3. Key quantified highlights (e.g., top X% partners contributing Y% of KPI).  
4. Strategic recommendations for diversification, partner development, and reducing dependency risk.


Data Summary:
{prompt_text}
            """

            try:
                response = self.model.generate_content(prompt)
                insights[zm] = response.text.strip()
            except Exception as e:
                insights[zm] = f"Error generating insight: {e}"

        return insights

# ------------------ RUN PIPELINE ------------------
merged_with_hierarchy_mis = clean_columns(merged_with_hierarchy_mis)
merged_with_hierarchy_mis = add_ratio_kpis(merged_with_hierarchy_mis)

kpi_list = [
    "Equity Sales as % of Total AUM",
    "Live SIP As % of Total AUM",
    "PMS AUM as % of Total AUM",
    "SIP Closure to Net SIP %",
    "MARS AUM as % Total AUM",
    "MARS Net Sales as % Equity Sales",
]

summary_df = pd.concat([
    partner_concentration_hierarchy(merged_with_hierarchy_mis, kpi) for kpi in kpi_list
])

insight_gen = DeepInsightGenerator(model)
hierarchy_insights = insight_gen.generate_zm_insights(summary_df)

# ------------------ DISPLAY ------------------
for zm, text in hierarchy_insights.items():
    print(f"\n\n########## ZONAL MANAGER: {zm} ##########\n{text}\n")


  safe_col("Total AUM FY 23-24 Q4 YTD").fillna(0)
  + safe_col("Total AUM FY 24-25 Q4 YTD").fillna(0)
  (safe_col("Net SIP FY 23-24 YTD") + safe_col("Net SIP FY 24-25 YTD")).replace(0, np.nan),
  (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")).replace(0, np.nan),




########## ZONAL MANAGER: BHARAT KACHHADIA ##########
Okay, let's analyze the partner concentration and business dependency for Zonal Manager Bharat Kachhadia, based on the provided data.

**Overall Concentration Level:**

Overall, the concentration level is **high** across the zone, indicating a significant dependency on a limited number of partners for each KPI, particularly for PMS AUM and MARS AUM. Equity Sales shows some diversification, but concerningly some RHs rely very heavily on a single partner. MARS Net Sales as % Equity Sales is also highly concentrated.

**Risk Areas and Partner Dependency:**

Several BMs and RHs exhibit extremely high dependency on a few key partners, creating significant risk. This is observed across all KPIs:

*   **Equity Sales:** N Chakradhar (BM: BODAIAHGARI CHENNA KISHOR REDDY) depends heavily on a single partner, Janaki Rama Raju Penmatsa (89.65%). TRIPATINATH RAM NARESH ROY (BM: MAHESH BABU VEMALA) is entirely reliant on themselves.
*   **PMS A

In [203]:
import pandas as pd
from sqlalchemy import create_engine

# ------------------ MYSQL CONNECTION ------------------
engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")

# ------------------ SAVE INSIGHTS TO MYSQL ------------------
def save_hierarchy_insights_to_mysql(insight_dict, summary_df, insight_type="Partner_Concentration"):
    """
    Saves zonal manager hierarchy insights (4 columns: ZM, BM, Insight_Type, Insight) into MySQL.
    BM is mapped from summary_df for each ZM.
    """
    records = []

    for zm, insight_text in insight_dict.items():
        # Get unique BMs under this ZM from summary_df
        bm_list = summary_df.loc[summary_df["ZM"] == zm, "BM"].unique().tolist()

        if not bm_list:  # In case of missing BM mapping
            records.append({
                "ZM": zm,
                "BM": "Unknown BM",
                "Insight_Type": insight_type,
                "Insight": insight_text
            })
        else:
            for bm in bm_list:
                records.append({
                    "ZM": zm,
                    "BM": bm,
                    "Insight_Type": insight_type,
                    "Insight": insight_text
                })

    df_to_save = pd.DataFrame(records)

    if df_to_save.empty:
        print(" No insights to save — DataFrame is empty.")
        return

    try:
        df_to_save.to_sql(
            name="AllInsights",  # You can change this to another table name
            con=engine,
            if_exists="append",
            index=False
        )
        print(f" Saved {len(df_to_save)} hierarchy insights to MySQL successfully.")
    except Exception as e:
        print(f" Error saving to MySQL: {e}")


In [209]:
save_hierarchy_insights_to_mysql(hierarchy_insights, summary_df)


 Saved 133 hierarchy insights to MySQL successfully.


  df_to_save.to_sql(


### ZM - Leaders & laggers

In [125]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ---------------- GEMINI SETUP ----------------
genai.configure(api_key=gemini_api_key)

class DeepInsightGenerator:
    """Uses Gemini LLM to generate descriptive & prescriptive insights dynamically."""
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str) -> str:
        try:
            response = self.model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            return f"Error generating insight: {e}"


# ---------------- ADD RATIO KPIS ----------------
def add_ratio_kpis(df):
    def safe_col(name):
        return df[name] if name in df.columns else pd.Series(0, index=df.index)

    df["Total AUM Combined"] = (
        safe_col("Total AUM FY 23-24 Q4 YTD").fillna(0)
        + safe_col("Total AUM FY 24-25 Q4 YTD").fillna(0)
    )
    total_aum = df["Total AUM Combined"].replace(0, np.nan)

    df["Equity Sales as % of Total AUM"] = np.divide(
        (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")) * 100, total_aum
    )

    df["Live SIP As % of Total AUM"] = np.divide(
        (safe_col("LIVE SIP FY 23-24 Q4 YTD") + safe_col("LIVE SIP FY 24-25 Q4 YTD")) * 100, total_aum
    )

    df["PMS AUM as % of Total AUM"] = np.divide(
        (safe_col("PMS AUM FY 23-24 YTD") + safe_col("PMS AUM FY 24-25 YTD")) * 100, total_aum
    )

    df["SIP Closure to Net SIP %"] = np.divide(
        (safe_col("SIP Closure FY 23-24 Q4 YTD") + safe_col("SIP Closure FY 24-25 Q4 YTD")) * 100,
        (safe_col("Net SIP FY 23-24 YTD") + safe_col("Net SIP FY 24-25 YTD")).replace(0, np.nan),
    )

    df["MARS AUM as % Total AUM"] = np.divide(
        (safe_col("MARS AUM FY 23-24 YTD") + safe_col("MARS AUM FY 24-25 YTD")) * 100, total_aum
    )

    df["MARS Net Sales as % Equity Sales"] = np.divide(
        (safe_col("MARS Net Sales FY 23-24 YTD") + safe_col("MARS Net Sales FY 24-25 YTD")) * 100,
        (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")).replace(0, np.nan),
    )

    return df


# ---------------- ZM-LEVEL LEADERS & LAGGERS ----------------
def zm_leaders_laggers(df, kpi, top_bms=2, top_partners=3):
    results = {}

    for zm, zm_group in df.groupby("ZM"):
        if kpi not in zm_group.columns:
            continue

        # Aggregate KPI by BM
        bm_perf = zm_group.groupby("BM")[kpi].sum().sort_values(ascending=False)
        top_bms_list = bm_perf.head(top_bms).index.tolist()
        bottom_bms_list = bm_perf.tail(top_bms).index.tolist()

        summary_text = f"Zonal Manager: {zm}\nKPI: {kpi}\n\n"

        for bm_name in top_bms_list + bottom_bms_list:
            bm_group = zm_group[zm_group["BM"] == bm_name]
            partner_perf = bm_group.groupby("Partner Name_x")[kpi].sum().sort_values(ascending=False)
            total_perf = partner_perf.sum()

            if total_perf == 0 or np.isnan(total_perf):
                partner_share = pd.Series(0, index=partner_perf.index)
            else:
                partner_share = round(partner_perf / total_perf * 100, 2)

            leaders = partner_perf.head(top_partners)
            laggers = partner_perf.tail(top_partners)

            summary_text += f"Branch Manager: {bm_name} | Total Partners: {len(partner_perf)}\n"
            summary_text += f"Top {top_partners} Leaders contribute: {partner_share.loc[leaders.index].sum():.2f}%\n"
            summary_text += f"Bottom {top_partners} Laggers contribute: {partner_share.loc[laggers.index].sum():.2f}%\n\n"

            summary_text += "Top Performing (Leader) Partners:\n"
            for p in leaders.index:
                summary_text += f" - {p}: {partner_perf[p]:,.2f} ({partner_share[p]}%)\n"

            summary_text += "Low Performing (Lagger) Partners:\n"
            for p in laggers.index:
                summary_text += f" - {p}: {partner_perf[p]:,.2f} ({partner_share[p]}%)\n"

            summary_text += "\n"

        results[zm] = summary_text

    return results

# ---------------- GEMINI INSIGHT GENERATION ----------------
def generate_zm_insights(df, kpi_list, generator, top_n=2):
    all_zm_insights = {}

    for kpi in kpi_list:
        zm_summaries = zm_leaders_laggers(df, kpi, top_n)

        for zm, text in zm_summaries.items():
            prompt = f"""
            You are a financial performance analyst.
            Below is data for Zonal Manager '{zm}' on KPI '{kpi}' showing top and bottom performing partners.

            Write a professional 3–4 bullet point insight covering:
            1. Performance dependency (e.g., dominated by few top partners or evenly spread),
            2. Impact of leaders on total performance,
            3. Weak link from laggers and improvement recommendations,
            4. Any early warning or strategic focus points.

            Data:
            {text}
            """
            insight = generator.generate_insight(prompt)
            all_zm_insights.setdefault(zm, {})[kpi] = text + "\n\nLLM Insights:\n" + insight

    return all_zm_insights


# ---------------- EXECUTION ----------------
# Clean and add ratio KPIs
merged_with_hierarchyy = add_ratio_kpis(merged_with_hierarchy_mis)

# Define KPI list (all ratio KPIs)
kpi_list = [
    "Equity Sales as % of Total AUM",
    "Live SIP As % of Total AUM",
    "PMS AUM as % of Total AUM",
    "SIP Closure to Net SIP %",
    "MARS AUM as % Total AUM",
    "MARS Net Sales as % Equity Sales"
]

# Initialize generator
generator = DeepInsightGenerator()

# Generate ZM-level insights
zm_insights = generate_zm_insights(merged_with_hierarchy_mis, kpi_list, generator, top_n=2)


# ---------------- DISPLAY ----------------
for zm, kpis in zm_insights.items():
    print(f"\n######## ZONAL MANAGER: {zm} ########")
    for kpi, insight in kpis.items():
        print(f"\n=== {kpi} ===")
        print(insight)
        print("-" * 80)


  safe_col("Total AUM FY 23-24 Q4 YTD").fillna(0)
  + safe_col("Total AUM FY 24-25 Q4 YTD").fillna(0)
  (safe_col("Net SIP FY 23-24 YTD") + safe_col("Net SIP FY 24-25 YTD")).replace(0, np.nan),
  (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")).replace(0, np.nan),



######## ZONAL MANAGER: BHARAT KACHHADIA ########

=== Equity Sales as % of Total AUM ===
Zonal Manager: BHARAT KACHHADIA
KPI: Equity Sales as % of Total AUM

Branch Manager: BODAIAHGARI CHENNA KISHOR REDDY | Total Partners: 241
Top 3 Leaders contribute: 5.81%
Bottom 3 Laggers contribute: -2.44%

Top Performing (Leader) Partners:
 - RAVIKUMAR DAGGUMATI: 286.26 (1.9742211074132636%)
 - PRAVEEN PITTA: 285.66 (1.9700719844522028%)
 - SAINATH PANJAGARI: 271.22 (1.8704542182118231%)
Low Performing (Lagger) Partners:
 - BODA CHANDRA SHEKAR: -90.01 (-0.6207230282308914%)
 - N V Subba Raju: -98.58 (-0.6798244000648956%)
 - KALI VARA PRASAD NARAYANASETTI: -164.92 (-1.137376336847485%)

Branch Manager: MAHESH BABU VEMALA | Total Partners: 279
Top 3 Leaders contribute: 5.74%
Bottom 3 Laggers contribute: -1.27%

Top Performing (Leader) Partners:
 - NAGARAJU GOUD PALLELA: 289.95 (2.04326588343635%)
 - VENKATESWARLU GUNDU: 270.12 (1.9034655454536082%)
 - MOHAMMED NIRANJAN: 253.92 (1.789364545285062

In [210]:
from sqlalchemy import create_engine
import pandas as pd

# ---------------- MYSQL CONNECTION ----------------
engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")

# ---------------- SAVE ZM INSIGHTS ----------------
def save_zm_insights_to_mysql(zm_insights, hierarchy_df, insight_type="Leaders Laggers"):
    """
    Saves ZM-level insights into MySQL with columns:
    ZM | BM | Insight_Type | Insight
    """

    records = []

    for zm, kpi_dict in zm_insights.items():
        # Get all BMs under this ZM from hierarchy
        bm_list = hierarchy_df.loc[hierarchy_df["ZM"] == zm, "BM"].unique().tolist()

        for kpi_name, insight_text in kpi_dict.items():
            for bm in (bm_list if bm_list else ["Unknown BM"]):
                records.append({
                    "ZM": zm,
                    "BM": bm,
                    "Insight_Type": insight_type,
                    "Insight": f"{kpi_name}\n\n{insight_text}"
                })

    df_to_save = pd.DataFrame(records, columns=["ZM", "BM", "Insight_Type", "Insight"])

    if df_to_save.empty:
        print(" No insights to save — DataFrame is empty.")
        return

    # Reorder columns to ensure ZM is first
    df_to_save = df_to_save[["ZM", "BM", "Insight_Type", "Insight"]]

    try:
        df_to_save.to_sql(
            name="AllInsights",  
            con=engine,
            if_exists="append", 
            index=False
        )
        print(f" Saved {len(df_to_save)} ZM insights to MySQL successfully.")
    except Exception as e:
        print(f" Error saving to MySQL: {e}")


# ---------------- EXECUTE SAVE ----------------
save_zm_insights_to_mysql(
    zm_insights,
    merged_with_hierarchy_mis,  # your full dataset with ZM-BM mapping
    insight_type="Leaders Laggers"
)


 Saved 918 ZM insights to MySQL successfully.


  df_to_save.to_sql(


#### For that ZM - Partners changing course / drastic change in performance.

In [211]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ================= GEMINI SETUP =================
gemini_api_key = google_api_key
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

# ================= CONFIG =================
zm_col = "ZM"
bm_col = "BM"
partner_col = "Partner Name_x"
top_zm = 2
top_bm = 2
top_partners = 2
threshold_pct_change = 50  # drastic change threshold %

# KPI columns (combined ratios)
kpi_cols = [
    "Equity Sales as % of Total AUM",
    "Live SIP As % of Total AUM",
    "PMS AUM as % of Total AUM",
    "SIP Closure to Net SIP %",
    "MARS AUM as % Total AUM",
    "MARS Net Sales as % Equity Sales"
]

# ================= ADD RATIO KPIs =================
def add_ratio_kpis(df):
    def safe_col(name):
        return df[name] if name in df.columns else pd.Series(0, index=df.index)

    df["Total AUM Combined"] = (
        safe_col("Total AUM FY 23-24 Q4 YTD").fillna(0)
        + safe_col("Total AUM FY 24-25 Q4 YTD").fillna(0)
    )
    total_aum = df["Total AUM Combined"].replace(0, np.nan)

    df["Equity Sales as % of Total AUM"] = np.divide(
        (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")) * 100, total_aum
    )

    df["Live SIP As % of Total AUM"] = np.divide(
        (safe_col("LIVE SIP FY 23-24 Q4 YTD") + safe_col("LIVE SIP FY 24-25 Q4 YTD")) * 100, total_aum
    )

    df["PMS AUM as % of Total AUM"] = np.divide(
        (safe_col("PMS AUM FY 23-24 YTD") + safe_col("PMS AUM FY 24-25 YTD")) * 100, total_aum
    )

    df["SIP Closure to Net SIP %"] = np.divide(
        (safe_col("SIP Closure FY 23-24 Q4 YTD") + safe_col("SIP Closure FY 24-25 Q4 YTD")) * 100,
        (safe_col("Net SIP FY 23-24 YTD") + safe_col("Net SIP FY 24-25 YTD")).replace(0, np.nan),
    )

    df["MARS AUM as % Total AUM"] = np.divide(
        (safe_col("MARS AUM FY 23-24 YTD") + safe_col("MARS AUM FY 24-25 YTD")) * 100, total_aum
    )

    df["MARS Net Sales as % Equity Sales"] = np.divide(
        (safe_col("MARS Net Sales FY 23-24 YTD") + safe_col("MARS Net Sales FY 24-25 YTD")) * 100,
        (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")).replace(0, np.nan),
    )

    return df

# ================= DETECT DRASTIC CHANGES =================
def detect_drastic_changes(df):
    records = []
    for (zm, bm, partner), group in df.groupby([zm_col, bm_col, partner_col]):
        for kpi in kpi_cols:
            prev_val = group[kpi].iloc[0] if len(group) > 0 else np.nan
            curr_val = group[kpi].iloc[-1] if len(group) > 0 else np.nan
            if pd.isna(prev_val) or pd.isna(curr_val) or prev_val == 0:
                continue
            pct_change = ((curr_val - prev_val) / abs(prev_val)) * 100
            if abs(pct_change) >= threshold_pct_change:
                records.append({
                    zm_col: zm,
                    bm_col: bm,
                    partner_col: partner,
                    "KPI": kpi,
                    "Previous Value": round(prev_val, 2),
                    "Current Value": round(curr_val, 2),
                    "% Change": round(pct_change, 2)
                })
    return pd.DataFrame(records)

# ================= FILTER TOP HIERARCHY =================
def get_top_hierarchy(change_df):
    top_zms = change_df.groupby(zm_col)["% Change"].apply(lambda x: x.abs().mean()).sort_values(ascending=False).head(top_zm).index
    final_records = []
    for zm in top_zms:
        zm_data = change_df[change_df[zm_col]==zm]
        top_bms = zm_data.groupby(bm_col)["% Change"].apply(lambda x: x.abs().mean()).sort_values(ascending=False).head(top_bm).index
        for bm in top_bms:
            bm_data = zm_data[zm_data[bm_col]==bm]
            top_partners_names = bm_data.groupby(partner_col)["% Change"].apply(lambda x: x.abs().mean()).sort_values(ascending=False).head(top_partners).index
            final_records.append(bm_data[bm_data[partner_col].isin(top_partners_names)])
    return pd.concat(final_records)

# ================= GENERATE INSIGHTS =================
def generate_zm_drastic_insights(change_df):
    zm_insights = {}
    for zm, zm_group in change_df.groupby(zm_col):
        zm_insights[zm] = {}
        for bm, bm_group in zm_group.groupby(bm_col):
            zm_insights[zm][bm] = {}
            for _, row in bm_group.iterrows():
                partner = row[partner_col]
                kpi = row["KPI"]
                prompt = f"""
You are a senior business analyst.
Analyze KPI '{kpi}' for partner '{partner}' under BM '{bm}' in ZM '{zm}'.

Partner is showing drastic change in performance (> {threshold_pct_change}% change):
Previous Value: {row['Previous Value']}
Current Value: {row['Current Value']}
% Change: {row['% Change']}

Your task:
- Identify reasons or insights for this drastic change.
- Suggest 1-2 quantitative recommendations for improvement.
- Explain impact on BM/ZM portfolio.

Output in concise bullet points.
"""
                try:
                    resp = model.generate_content(
                        prompt,
                        generation_config={"temperature":0.6, "top_p":0.9, "max_output_tokens":800}
                    )
                    zm_insights[zm][bm][partner] = resp.text.strip()
                except Exception as e:
                    zm_insights[zm][bm][partner] = f"Error generating insight: {e}"
    return zm_insights

# ================= EXECUTION =================
merged_with_hierarchyy = add_ratio_kpis(merged_with_hierarchy_mis)
change_df = detect_drastic_changes(merged_with_hierarchyy)
filtered_df = get_top_hierarchy(change_df)
zm_drastic_insights = generate_zm_drastic_insights(filtered_df)

# ================= DISPLAY =================
print("\n================= TOP ZM → BM → PARTNERS (DRASTIC CHANGE) =================\n")
print(filtered_df.head(10))

print("\n================= ZM DRASTIC CHANGE INSIGHTS =================\n")
for zm, bms in zm_drastic_insights.items():
    print(f"\n######## Zonal Manager: {zm} ########\n")
    for bm, partners in bms.items():
        print(f"--- Branch Manager: {bm} ---\n")
        for partner, insight in partners.items():
            print(f"Partner: {partner}\n{insight}\n")
        print("-"*100)
    print("="*120)


  safe_col("Total AUM FY 23-24 Q4 YTD").fillna(0)
  + safe_col("Total AUM FY 24-25 Q4 YTD").fillna(0)
  (safe_col("Net SIP FY 23-24 YTD") + safe_col("Net SIP FY 24-25 YTD")).replace(0, np.nan),
  (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")).replace(0, np.nan),




                          ZM              BM  Partner Name_x  \
13  PRASHANT ANANTRAI KAKKAD      AYUSH JAIN     VIKAS GUPTA   
20  PRASHANT ANANTRAI KAKKAD  MD NIYAZ AHMAD    Dinesh Singh   
21  PRASHANT ANANTRAI KAKKAD  MD NIYAZ AHMAD    Dinesh Singh   
22  PRASHANT ANANTRAI KAKKAD  MD NIYAZ AHMAD     VIDHI GUPTA   
0              KOUSHIK GHOSH  DEBASHIS GHOSH  DIPANKAR GHOSH   
1              KOUSHIK GHOSH  DEBASHIS GHOSH  DIPANKAR GHOSH   
2              KOUSHIK GHOSH  DEBASHIS GHOSH  DIPANKAR GHOSH   
3              KOUSHIK GHOSH   PREETAM KUMAR      AMIT KUMAR   

                                 KPI  Previous Value  Current Value  % Change  
13    Equity Sales as % of Total AUM           28.02          90.99    224.69  
20    Equity Sales as % of Total AUM            7.52          15.22    102.53  
21           MARS AUM as % Total AUM           13.02          44.20    239.52  
22    Equity Sales as % of Total AUM           28.70          94.35    228.80  
0     Equity Sales as

In [217]:
from sqlalchemy import create_engine, text
import pandas as pd

# ---------------- MYSQL CONNECTION ----------------
engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")

# ---------------- SAVE PARTNER-LEVEL INSIGHTS ----------------
def save_drastic_insights_to_mysql(zm_drastic_insights, insight_type="Drastic_Change_Analysis"):
    """
    Stores ZM → BM → Partner-level drastic change insights into MySQL.
    Each record will include ZM, BM, Partner_Name, Insight_Type, Insight.
    """
    records = []

    # Flatten nested dict structure
    for zm, bms in zm_drastic_insights.items():
        for bm, partners in bms.items():
            for partner, insight_text in partners.items():
                records.append({
                    "ZM": zm,
                    "BM": bm,
                    "Partner_Name": partner,
                    "Insight_Type": insight_type,
                    "Insight": insight_text
                })

    # Convert to DataFrame
    df_to_save = pd.DataFrame(records)

    if df_to_save.empty:
        print("⚠️ No insights to save — DataFrame is empty.")
        return

    # Ensure correct column order
    expected_cols = ["ZM", "BM", "Partner_Name", "Insight_Type", "Insight"]
    df_to_save = df_to_save[expected_cols]

    try:
        # Test if table exists (case-sensitive)
        with engine.connect() as conn:
            tables = conn.execute(text("SHOW TABLES")).fetchall()
            existing_tables = [t[0] for t in tables]
            print(f"📋 Existing tables in DB: {existing_tables}")

        # Save insights
        df_to_save.to_sql(
            name="AllInsights",      # consistent casing
            con=engine,
            if_exists="append",      # append instead of overwrite
            index=False
        )

        print(f"✅ {len(df_to_save)} partner-level insights saved successfully into 'AllInsights'.")

    except Exception as e:
        print(f"❌ Error saving insights to MySQL: {e}")


# ---------------- EXECUTE SAVE ----------------
save_drastic_insights_to_mysql(zm_drastic_insights)


📋 Existing tables in DB: ['allinsights', 'insights_data', 'partnerfocusinsights']
✅ 5 partner-level insights saved successfully into 'AllInsights'.


  df_to_save.to_sql(


###  For that ZM - Partners and areas to focus

In [144]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ================= GEMINI SETUP =================
gemini_api_key = google_api_key
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

# ================= CONFIG =================
zm_col = "ZM"
bm_col = "BM"
partner_col = "Partner Name_x"
top_zm = 2
top_bm = 2
top_partners = 2
target_col_suffix = 'Target'

# KPI columns for focus (combined %)
kpi_cols = [
    "Equity Sales as % of Total AUM",
    "Live SIP As % of Total AUM",
    "PMS AUM as % of Total AUM",
    "SIP Closure to Net SIP %",
    "MARS AUM as % Total AUM",
    "MARS Net Sales as % Equity Sales"
]

# ================= ADD RATIO KPIs =================
def add_ratio_kpis(df):
    def safe_col(name):
        return df[name] if name in df.columns else pd.Series(0, index=df.index)

    df["Total AUM Combined"] = (
        safe_col("Total AUM FY 23-24 Q4 YTD").fillna(0)
        + safe_col("Total AUM FY 24-25 Q4 YTD").fillna(0)
    )
    total_aum = df["Total AUM Combined"].replace(0, np.nan)

    df["Equity Sales as % of Total AUM"] = np.divide(
        (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")) * 100, total_aum
    )

    df["Live SIP As % of Total AUM"] = np.divide(
        (safe_col("LIVE SIP FY 23-24 Q4 YTD") + safe_col("LIVE SIP FY 24-25 Q4 YTD")) * 100, total_aum
    )

    df["PMS AUM as % of Total AUM"] = np.divide(
        (safe_col("PMS AUM FY 23-24 YTD") + safe_col("PMS AUM FY 24-25 YTD")) * 100, total_aum
    )

    df["SIP Closure to Net SIP %"] = np.divide(
        (safe_col("SIP Closure FY 23-24 Q4 YTD") + safe_col("SIP Closure FY 24-25 Q4 YTD")) * 100,
        (safe_col("Net SIP FY 23-24 YTD") + safe_col("Net SIP FY 24-25 YTD")).replace(0, np.nan),
    )

    df["MARS AUM as % Total AUM"] = np.divide(
        (safe_col("MARS AUM FY 23-24 YTD") + safe_col("MARS AUM FY 24-25 YTD")) * 100, total_aum
    )

    df["MARS Net Sales as % Equity Sales"] = np.divide(
        (safe_col("MARS Net Sales FY 23-24 YTD") + safe_col("MARS Net Sales FY 24-25 YTD")) * 100,
        (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")).replace(0, np.nan),
    )

    return df

# ================= IDENTIFY PERFORMANCE GAPS =================
def identify_focus_areas(df):
    focus_records = []
    kpi_avg = df[kpi_cols].mean()

    for (zm, bm, partner), group in df.groupby([zm_col, bm_col, partner_col]):
        partner_row = group.iloc[-1]  # latest record
        gaps = {}
        for kpi in kpi_cols:
            possible_targets = [col for col in df.columns if target_col_suffix in col and kpi in col]
            target_value = partner_row[possible_targets[0]] if possible_targets else kpi_avg[kpi]
            gap_pct = ((target_value - partner_row[kpi]) / target_value) * 100 if target_value and not pd.isna(target_value) else 0
            gaps[kpi] = round(gap_pct, 2)

        max_gap = max(gaps.values())
        sorted_gaps = sorted(gaps.items(), key=lambda x: x[1], reverse=True)

        focus_records.append({
            zm_col: zm,
            bm_col: bm,
            partner_col: partner,
            "Focus KPIs": sorted_gaps,
            "Max Gap": max_gap
        })

    return pd.DataFrame(focus_records)

# ================= FILTER TOP HIERARCHY =================
def get_top_hierarchy(focus_df):
    top_zms = focus_df.groupby(zm_col)["Max Gap"].max().sort_values(ascending=False).head(top_zm).index
    final_records = []
    for zm in top_zms:
        zm_data = focus_df[focus_df[zm_col] == zm]
        top_bms = zm_data.groupby(bm_col)["Max Gap"].max().sort_values(ascending=False).head(top_bm).index
        for bm in top_bms:
            bm_data = zm_data[zm_data[bm_col] == bm]
            top_partners_names = bm_data.groupby(partner_col)["Max Gap"].max().sort_values(ascending=False).head(top_partners).index
            final_records.append(bm_data[bm_data[partner_col].isin(top_partners_names)])
    return pd.concat(final_records)

# ================= GENERATE INSIGHTS =================
def generate_zm_focus_insights(focus_df):
    zm_insights = {}
    for zm, zm_group in focus_df.groupby(zm_col):
        zm_insights[zm] = {}
        for bm, bm_group in zm_group.groupby(bm_col):
            zm_insights[zm][bm] = {}
            for _, row in bm_group.iterrows():
                partner = row[partner_col]
                focus_kpis = row["Focus KPIs"]
                prompt = f"""
You are a senior business analyst.
Analyze performance gaps for partner '{partner}' under Branch Manager '{bm}' in Zonal Manager '{zm}'.

Focus KPIs and % gaps:
{focus_kpis}

Your task:
- Identify top 2–3 KPIs with largest gaps.
- Suggest realistic improvement targets (X%) and estimated partner-level gain.
- Estimate total potential BM/ZM performance improvement (Y%).
- Provide one actionable recommendation for ZM/ BM.

Output in concise bullet points only.
"""
                try:
                    resp = model.generate_content(
                        prompt,
                        generation_config={"temperature": 0.7, "top_p": 0.9, "top_k": 40, "max_output_tokens": 800}
                    )
                    zm_insights[zm][bm][partner] = resp.text.strip()
                except Exception as e:
                    zm_insights[zm][bm][partner] = f"Error generating insight: {e}"
    return zm_insights

# ================= EXECUTION =================
merged_with_hierarchyy = add_ratio_kpis(merged_with_hierarchy_mis)
focus_df = identify_focus_areas(merged_with_hierarchyy)
filtered_df = get_top_hierarchy(focus_df)
zm_focus_insights = generate_zm_focus_insights(filtered_df)

# ================= DISPLAY =================
print("\n================= ZM → BM → TOP PARTNERS FOCUS =================\n")
print(filtered_df.head(10))

print("\n================= ZM INSIGHTS =================\n")
for zm, bms in zm_focus_insights.items():
    print(f"\n######## Zonal Manager: {zm} ########\n")
    for bm, partners in bms.items():
        print(f"--- Branch Manager: {bm} ---\n")
        for partner, insight in partners.items():
            print(f"Partner: {partner}\n{insight}\n")
        print("-" * 100)
    print("=" * 120)


  safe_col("Total AUM FY 23-24 Q4 YTD").fillna(0)
  + safe_col("Total AUM FY 24-25 Q4 YTD").fillna(0)
  (safe_col("Net SIP FY 23-24 YTD") + safe_col("Net SIP FY 24-25 YTD")).replace(0, np.nan),
  (safe_col("Equity Net Sales FY 23-24 YTD") + safe_col("Equity Net Sales FY 24-25 YTD")).replace(0, np.nan),




                           ZM                BM  \
25408  SARFARAZ ABDULLA PATEL    RAVI KACHIWALA   
25596  SARFARAZ ABDULLA PATEL    RAVI KACHIWALA   
19912  SARFARAZ ABDULLA PATEL   AJAY KUMAR SONI   
19925  SARFARAZ ABDULLA PATEL   AJAY KUMAR SONI   
9562            KOUSHIK GHOSH  MANORANJAN SAHOO   
9568            KOUSHIK GHOSH  MANORANJAN SAHOO   
7562            KOUSHIK GHOSH    ABHISHEK GHOSH   
7598            KOUSHIK GHOSH    ABHISHEK GHOSH   

                      Partner Name_x  \
25408  KALPESH PRAVINCHANDRA DIVETIA   
25596                    SAKSHI JAIN   
19912                   VIKAS AGRWAL   
19925                  Yasho Wardhan   
9562             Simanchala Tripathy   
9568             Susanta Kumar Panda   
7562                   Debasish Ball   
7598            MANORANJAN MUKHERJEE   

                                              Focus KPIs   Max Gap  
25408  [(MARS Net Sales as % Equity Sales, 151.91), (...    151.91  
25596  [(Equity Sales as % of Total AUM

In [218]:
from sqlalchemy import create_engine, text
import pandas as pd

# ---------------- MYSQL CONNECTION ----------------
engine = create_engine("mysql+pymysql://root:1234@localhost:3306/Insights")

# ---------------- SAVE ZM FOCUS INSIGHTS ----------------
def save_zm_focus_insights_to_mysql(zm_focus_insights, insight_type="Area to Focus"):
    """
    Saves ZM → BM → Partner-level insights into MySQL.
    Each record includes: ZM | BM | Partner_Name | Insight_Type | Insight
    """

    records = []

    # Flatten nested structure
    for zm, bm_dict in zm_focus_insights.items():
        for bm, partners_dict in bm_dict.items():
            for partner, insight_text in partners_dict.items():
                records.append({
                    "ZM": zm,
                    "BM": bm,
                    "Partner_Name": partner,
                    "Insight_Type": insight_type,
                    "Insight": insight_text
                })

    df_to_save = pd.DataFrame(records)

    if df_to_save.empty:
        print("No insights to save — DataFrame is empty.")
        return

    # Reorder columns for consistency
    df_to_save = df_to_save[["ZM", "BM", "Partner_Name", "Insight_Type", "Insight"]]

    try:
        # Check existing tables
        with engine.connect() as conn:
            tables = conn.execute(text("SHOW TABLES")).fetchall()
            existing_tables = [t[0] for t in tables]
            print(f"📋 Existing tables in DB: {existing_tables}")

        # Save data to MySQL
        df_to_save.to_sql(
            name="AllInsights",       
            con=engine,
            if_exists="append",       
            index=False
        )

        print(f" {len(df_to_save)} ZM focus insights saved successfully into 'AllInsights'.")

    except Exception as e:
        print(f" Error saving insights to MySQL: {e}")


# ---------------- EXECUTE SAVE ----------------
save_zm_focus_insights_to_mysql(zm_focus_insights)


📋 Existing tables in DB: ['allinsights', 'insights_data', 'partnerfocusinsights']
 8 ZM focus insights saved successfully into 'AllInsights'.


  df_to_save.to_sql(


In [None]:
## ZM - Partners changing course / drastic change in performance  

In [152]:
import pandas as pd
import numpy as np
import google.generativeai as genai

# ================= GEMINI SETUP =================
genai.configure(api_key=google_api_key)

# ================= GEMINI INSIGHT GENERATOR =================
class DeepInsightGenerator:
    """Uses Gemini LLM to generate descriptive & prescriptive insights dynamically."""
    def __init__(self, model_name="gemini-2.0-flash"):
        self.model = genai.GenerativeModel(model_name)
    
    def generate_insight(self, prompt: str) -> str:
        try:
            response = self.model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            return f"Error generating insight: {e}"

# ================= DATA CLEANING =================
def clean_columns(df):
    df.columns = (
        df.columns
        .str.replace('\n', ' ', regex=True)
        .str.replace('\r', '', regex=True)
        .str.strip()
    )
    return df

# ================= KPI CALCULATIONS =================
def add_ratio_kpis(df):
    """Adds ratio KPIs using combined Total AUM from FY23–24 and FY24–25 safely."""
    df["Total AUM Combined"] = (
        df.get("Total AUM FY 23-24 Q4 YTD", 0) + df.get("Total AUM FY 24-25 Q4 YTD", 0)
    )
    df["Total AUM Combined"] = df["Total AUM Combined"].replace(0, np.nan)

    df["Equity Sales as % of Total AUM"] = (
        (df.get("Equity Net Sales FY 23-24 YTD", 0) + df.get("Equity Net Sales FY 24-25 YTD", 0))
        / df["Total AUM Combined"] * 100
    )

    df["Live SIP As % of Total AUM"] = (
        (df.get("LIVE SIP FY 23-24 Q4 YTD", 0) + df.get("LIVE SIP FY 24-25 Q4 YTD", 0))
        / df["Total AUM Combined"] * 100
    )

    df["PMS AUM as % of Total AUM"] = (
        (df.get("PMS AUM FY 23-24 YTD", 0) + df.get("PMS AUM FY 24-25 YTD", 0))
        / df["Total AUM Combined"] * 100
    )

    df["SIP Closure to Net SIP %"] = np.where(
        (df.get("Net SIP FY 23-24 YTD", 0) + df.get("Net SIP FY 24-25 YTD", 0)) == 0,
        np.nan,
        (df.get("SIP Closure FY 23-24 Q4 YTD", 0) + df.get("SIP Closure FY 24-25 Q4 YTD", 0))
        / (df.get("Net SIP FY 23-24 YTD", 0) + df.get("Net SIP FY 24-25 YTD", 0)) * 100
    )

    df["MARS AUM as % Total AUM"] = (
        (df.get("MARS AUM FY 23-24 YTD", 0) + df.get("MARS AUM FY 24-25 YTD", 0))
        / df["Total AUM Combined"] * 100
    )

    df["MARS Net Sales as % Equity Sales"] = np.where(
        (df.get("Equity Net Sales FY 23-24 YTD", 0) + df.get("Equity Net Sales FY 24-25 YTD", 0)) == 0,
        np.nan,
        (df.get("MARS Net Sales FY 23-24 YTD", 0) + df.get("MARS Net Sales FY 24-25 YTD", 0))
        / (df.get("Equity Net Sales FY 23-24 YTD", 0) + df.get("Equity Net Sales FY 24-25 YTD", 0)) * 100
    )

    df = df.fillna(0).replace([float('inf'), float('-inf')], 0)
    return df

# ================= PARTNER CONCENTRATION =================
thresholds = [25, 50, 75, 80, 90]

def partner_concentration(df, kpi, thresholds=thresholds):
    df_sorted = (
        df.groupby('Partner Name')[kpi]
        .sum()
        .sort_values(ascending=False)
        .reset_index()
    )

    total_kpi = df_sorted[kpi].sum()
    df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
    df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100

    total_partners = len(df_sorted)
    conc_summary = {}

    for t in thresholds:
        num_partners = (df_sorted['Cumulative KPI %'] >= t).idxmax() + 1
        conc_summary[f'Partners for {t}%'] = num_partners
        conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)
 
    return conc_summary, total_partners, df_sorted

# ================= ZONAL MANAGER HIERARCHY =================
def top_zonal_manager_concentration(df, kpi, top_zms=2, top_bms=2, top_partners=2, thresholds=thresholds):
    zm_dict = {}

    # Identify Top & Bottom ZMs
    zm_performance = (
        df.groupby('ZM')[kpi]
        .sum()
        .sort_values(ascending=False)
    )
    top_zms_list = zm_performance.head(top_zms).index.tolist()
    bottom_zms_list = zm_performance.tail(top_zms).index.tolist()

    for zm_group_list, category in [(top_zms_list, "Top ZMs"), (bottom_zms_list, "Bottom ZMs")]:
        zm_dict[category] = {}
        for zm in zm_group_list:
            zm_group = df[df['ZM'] == zm]
            zm_summary = {}

            # Identify Top & Bottom BMs under each ZM
            bm_perf = zm_group.groupby('BM')[kpi].sum().sort_values(ascending=False)
            top_bms_list = bm_perf.head(top_bms).index.tolist()
            bottom_bms_list = bm_perf.tail(top_bms).index.tolist()

            bm_dict = {"Top BMs": {}, "Bottom BMs": {}}

            for bm_list, label in [(top_bms_list, "Top BMs"), (bottom_bms_list, "Bottom BMs")]:
                for bm in bm_list:
                    bm_group = zm_group[zm_group['BM'] == bm]
                    conc_summary, total_partners, df_sorted = partner_concentration(bm_group, kpi, thresholds)
                    zm_total = zm_group[kpi].sum()
                    bm_total = bm_group[kpi].sum()
                    bm_contribution_pct = round(bm_total / zm_total * 100, 2) if zm_total else 0

                    bm_dict[label][bm] = {
                        "Total Partners": total_partners,
                        "Branch Contribution %": bm_contribution_pct,
                        "Partner Concentration Summary": conc_summary,
                        "Top Partners": df_sorted.head(top_partners)[['Partner Name', kpi, 'Cumulative KPI %']].to_dict(orient='records'),
                        "Bottom Partners": df_sorted.tail(top_partners)[['Partner Name', kpi, 'Cumulative KPI %']].to_dict(orient='records')
                    }

            zm_dict[category][zm] = bm_dict

    return zm_dict

# ================= ZM INSIGHT GENERATION =================
def generate_zonal_manager_insights(df, kpi_list, generator, top_zms=2, top_bms=2, top_partners=2):
    zm_all_insights = {}

    for kpi in kpi_list:
        summary = top_zonal_manager_concentration(df, kpi, top_zms, top_bms, top_partners)

        for category, zms in summary.items():
            for zm, bm_data in zms.items():
                text_summary = f"\n########## {zm} | {category} | {kpi} ##########\n"
                for bm_group, bm_info_dict in bm_data.items():
                    text_summary += f"\n=== {bm_group.upper()} ===\n"
                    for bm, bm_info in bm_info_dict.items():
                        conc = bm_info["Partner Concentration Summary"]
                        total = bm_info["Total Partners"]
                        branch_pct = bm_info["Branch Contribution %"]

                        parts = []
                        for t in thresholds:
                            partners = conc.get(f'Partners for {t}%', '-')
                            pct_total = conc.get(f'% of Total Partners for {t}%', '-')
                            parts.append(f"{partners} partners ({pct_total}% of total) drive {t}% of {kpi}")

                        text_summary += (
                            f"Branch Manager: {bm} ({branch_pct}% of total zone {kpi}, {total} partners)\n"
                            + " → " + ", ".join(parts) + ".\n"
                        )

                        text_summary += "\nTop Partners:\n"
                        for partner in bm_info["Top Partners"]:
                            text_summary += f"  - {partner['Partner Name']}: {partner[kpi]:,.2f} ({partner['Cumulative KPI %']:.2f}%)\n"

                        text_summary += "\nBottom Partners:\n"
                        for partner in bm_info["Bottom Partners"]:
                            text_summary += f"  - {partner['Partner Name']}: {partner[kpi]:,.2f} ({partner['Cumulative KPI %']:.2f}%)\n"

                prompt = f"""
                You are a zonal performance analyst.
                Below is structured data for Zonal Manager '{zm}' showing top/bottom BMs and partners for KPI '{kpi}'.
                Generate insights (200–300 words) highlighting:
                1. Top and bottom performers with % contribution figures,
                2. Key performance gaps and dependencies,
                3. Specific reasons behind performance skew,
                4. Actionable recommendations to improve laggers and reduce risk.

                Data Summary:
                {text_summary}
                """

                insight = generator.generate_insight(prompt)
                zm_all_insights.setdefault(category, {})[zm] = {
                    "raw_text": text_summary,
                    "llm_insight": insight
                }

    return zm_all_insights

# ================= SAMPLE EXECUTION =================
# df = clean_columns(merged_with_hierarchy_mis)
# df = add_ratio_kpis(df)
# generator = DeepInsightGenerator()
# kpi_list = ["Equity Sales as % of Total AUM"]
# insights = generate_zonal_manager_insights(df, kpi_list, generator, top_zms=2, top_bms=2, top_partners=2)
# print(insights)


In [153]:
df = clean_columns(merged_with_hierarchy_mis)
df = add_ratio_kpis(df)

generator = DeepInsightGenerator()

# Choose which KPIs you want to analyze
kpi_list = [
    "Equity Sales as % of Total AUM",
    "Live SIP As % of Total AUM",
    "MARS Net Sales as % Equity Sales",
    "PMS AUM as % of Total AUM",
    "SIP Closure to Net SIP %",
    "MARS AUM as % Total AUM",
    "MARS Net Sales as % Equity Sales",
    "Equity Sales",
    "SIP Sales Achievement",
    "Net Sales through MARS",
    "Investment Net Sales Achievement",
    "Client Acquisition_% Achievement",
    "LAS_% Achievement"
]

# Generate insights
zm_all_insights = generate_zonal_manager_insights(df, kpi_list, generator)

# Print insights nicely
for zm, kpis in zm_all_insights.items():
    for kpi, details in kpis.items():
        print(f"\n\n########## {zm} | {kpi} ##########\n")
        print(details["llm_insight"])



########## Top ZMs | SARFARAZ ABDULLA PATEL ##########

## Zonal Performance Analysis - SARFARAZ ABDULLA PATEL - LAS_% Achievement

**Overall Performance & Key Contributors:**

Sarfaraz Abdulla Patel's zone demonstrates a skewed LAS_% Achievement distribution. Top BMs, Shailendrakumar Avdheshkumar Mishra (5.6%) and Abhishek Sengar (5.59%), contribute significantly to the total zonal achievement.  Within these top-performing branches, a relatively small number of partners (around 15-18%) drive a substantial portion (50%) of the LAS_% Achievement, indicating a high reliance on key performers. In contrast, Bottom BMs Namita Bhamre (0.16%) and Kalyani Ramesh Dongre (0.0%) lag significantly behind. For both BMs, a single partner represents a large chunk of achievement.

**Performance Gaps & Dependencies:**

A clear performance gap exists between the top and bottom BMs, with a significant disparity in LAS_% Achievement. The performance of both top and bottom BMs is heavily dependent on the

In [132]:
merged_with_hierarchy_mis = merged_with_hierarchy_mis.rename(
    columns={
        'Partner Name_x': 'Partner Name',
        'Partner Name_y': 'Partner Name_X',
        'Center_x': 'Center',
        'Center_y': 'Center_X',
        'Category_x': 'Category',
        'Category_y': 'Category_X',
        'FY_Year_x': 'FY_Year',
    }
)


In [180]:
merged_df.head(2)

Unnamed: 0,Sr No.,Partner Code,Partner Name,Center_x,Category,Relationship Handler,Investment Net Sales Target,Investment Net Sales Achievement,Investment Net Sales % Achievement,Equity Sales,MIP Sales,Gold Sales,Sales in Physical Assets,Sales in Direct Equity,FD + Bond (Primary Market) Sales,Secondary Market Bond Sales,Net Sales Through Realty,Net NJ PMS Sales,Net Non-NJ PMS Sales,Net Sales through MARS,SIP Sales Target,SIP Sales Achievement,SIP Sales % Achievement,Fresh Gross SIP Sales,SIP Closure / Termination,FY_Year,Sr No,Broker Code,Doer Name,Doer Type,Center_y,Equity Net Sales_Target,Equity Net Sales_Achievement,Equity Net Sales_% Achievement,Insurance_Target,Insurance_Achievement,Insurance_% Achievement,SIP Sales_Target,SIP Sales_Achievement,SIP Sales_% Achievement,Client Acquisition_Target,Client Acquisition_Achievement,Client Acquisition_% Achievement,LAS_Target,LAS_Achievement,LAS_% Achievement,SIP to Net Sales Ratio_SIP Input Value,SIP to Net Sales Ratio_Ratio,Total % Achievement,Total_Performance
0,1,23676,ALOKE CHATTERJEE,24 SOUTH PARGANA,NON D,SUBRATA MAITY,1500000,104696.25,6.98,74996.25,0,0,0,0,0,0,0,0,0,29700,33750,8624.59,25.55,8624.59,0.0,2023,1,23676,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,1500000,104696.25,6.98,,,,33750,8624.59,25.55,20,7.5,37.5,2.22,0,0,4999.76,1.67,17.82,5954.006
1,2,20361,ARINDAM CHAKRAVARTI,24 SOUTH PARGANA,NON D,SUBRATA MAITY,3360000,1458896.25,43.42,5246.25,0,0,0,0,0,0,0,0,0,1453650,75600,62996.85,83.33,215239.24,152242.39,2023,2,20361,SUBRATA MAITY,Fundz Express,24 SOUTH PARGANA,3360000,1458896.25,43.42,34650.0,963.32,2.78,75600,62996.85,83.33,40,15.0,37.5,907.03,0,0,111994.42,37.33,41.04,290762.85


##### CODE (SKIP)

##### Partner Concentration

In [95]:
##### Partner Concentration
import pandas as pd

kpi_list = ['Equity Sales', 'SIP Sales Achievement', 
            'Net Sales through MARS', 'Investment Net Sales Achievement']

thresholds = [25, 50, 75, 80, 90]  

def partner_concentration(df, kpi, thresholds=thresholds):
    df_sorted = df.groupby('Partner Name')[kpi].sum().sort_values(ascending=False).reset_index()
    total_kpi = df_sorted[kpi].sum()
    df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
    df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100
    total_partners = len(df_sorted)
    
    conc_summary = {}
    for t in thresholds:
        num_partners = df_sorted[df_sorted['Cumulative KPI %'] <= t].shape[0]
        conc_summary[f'Partners for {t}%'] = num_partners
        conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)
    
    return conc_summary, total_partners

# Generate Deep Insights

def generate_partner_concentration_deep_insights(df, kpi_list):
    all_insights = {}
    
    for kpi in kpi_list:
        conc_summary, total_partners = partner_concentration(df, kpi)
        
      
        insight_text = f"Partner Concentration Insight for KPI '{kpi}':\n"
        for t in thresholds:
            insight_text += f" - Top {conc_summary[f'Partners for {t}%']} partners " \
                            f"({conc_summary[f'% of Total Partners for {t}%']}% of total partners) " \
                            f"contribute {t}% of total {kpi}.\n"
        
        insight_text += "\nDeep Insights:\n"
        
        #  concentration level
        pct_80_share = conc_summary.get('% of Total Partners for 80%', None)
        if pct_80_share is not None:
            if pct_80_share <= 10:
                insight_text += f" - Highly concentrated: only {pct_80_share}% of partners contribute 80% of {kpi}. Business depends heavily on top performers.\n"
            elif pct_80_share <= 25:
                insight_text += f" - Moderately concentrated: top {pct_80_share}% of partners contribute 80% of {kpi}. Some dependency on key partners exists.\n"
            else:
                insight_text += f" - Low concentration: contributions are more evenly spread across partners.\n"
        
       
        insight_text += f" - Opportunity: Upskill mid- and low-tier partners to reduce dependency on top {pct_80_share}% partners.\n"
        insight_text += f" - Risk: Losing a few top partners could significantly impact {kpi}.\n"
        
        all_insights[kpi] = insight_text
    
    return all_insights


partner_concentration_insights = generate_partner_concentration_deep_insights(merged_df, kpi_list)

for kpi, insight in partner_concentration_insights.items():
    print(f"\n=== {kpi} ===")
    print(insight)
    print("\n"+"="*100)



=== Equity Sales ===
Partner Concentration Insight for KPI 'Equity Sales':
 - Top 264 partners (0.68% of total partners) contribute 25% of total Equity Sales.
 - Top 1044 partners (2.69% of total partners) contribute 50% of total Equity Sales.
 - Top 2804 partners (7.22% of total partners) contribute 75% of total Equity Sales.
 - Top 3391 partners (8.73% of total partners) contribute 80% of total Equity Sales.
 - Top 5052 partners (13.01% of total partners) contribute 90% of total Equity Sales.

Deep Insights:
 - Highly concentrated: only 8.73% of partners contribute 80% of Equity Sales. Business depends heavily on top performers.
 - Opportunity: Upskill mid- and low-tier partners to reduce dependency on top 8.73% partners.
 - Risk: Losing a few top partners could significantly impact Equity Sales.



=== SIP Sales Achievement ===
Partner Concentration Insight for KPI 'SIP Sales Achievement':
 - Top 400 partners (1.03% of total partners) contribute 25% of total SIP Sales Achievement.


### Partners - Leaders & Laggers

In [97]:
import pandas as pd
import concurrent.futures
import google.generativeai as genai
import markdown

class DeepInsightGenerator:
    """
    Generates deep, descriptive, and prescriptive insights
    for KPIs across hierarchical dimensions, including Leaders & Laggers analysis.
    """

    def __init__(self, gemini_api_key):
        self.gemini_api_key = gemini_api_key
        genai.configure(api_key=gemini_api_key)
        self.model = genai.GenerativeModel("gemini-2.0-flash")

    def identify_leaders_laggers(self, df, kpi, top_n=5):
        """
        Returns top N leaders and bottom N laggers with KPI contributions.
        """
        df_grouped = df.groupby('Partner Name')[kpi].sum().reset_index()
        total_kpi = df_grouped[kpi].sum()

        # Leaders
        leaders = df_grouped.sort_values(by=kpi, ascending=False).head(top_n).copy()
        leaders['Contribution %'] = (leaders[kpi] / total_kpi * 100).round(2)

        # Laggers
        laggers = df_grouped.sort_values(by=kpi, ascending=True).head(top_n).copy()
        laggers['Contribution %'] = (laggers[kpi] / total_kpi * 100).round(2)

        return leaders, laggers, total_kpi

    def generate_leaders_laggers_prompts(self, leaders, laggers, df, kpi, kpi_list):
        """
        Generates descriptive and prescriptive prompts for LLM.
        """
        def build_context(df_subset, role):
            text = ""
            for _, row in df_subset.iterrows():
                partner = row['Partner Name']
                pct = row['Contribution %']
                other_kpis = df[df['Partner Name']==partner][kpi_list].sum().to_dict()
                text += f"""
Partner: {partner}
Role: {role}
KPI: {kpi} Contribution: {pct}%
Other KPIs: {other_kpis}
"""
            return text

        leaders_context = build_context(leaders, "Leader")
        laggers_context = build_context(laggers, "Lagger")

        descriptive_prompt = f"""
Analyze the following partners for KPI '{kpi}':

Leaders:
{leaders_context}

Laggers:
{laggers_context}

Produce deep, descriptive insights for each partner:
- Why they are leaders/laggers
- Their impact on overall KPI performance
- Patterns in their performance across KPIs
- Areas of strength and weakness
"""

        prescriptive_prompt = f"""
Based on the partners' KPI contributions for '{kpi}':
{leaders_context}
{laggers_context}

Generate actionable, prescriptive insights:
- How leaders can sustain or improve performance
- How laggers can improve, focus areas, and interventions
- Highlight potential concentration risks or opportunities
"""

        return descriptive_prompt, prescriptive_prompt
        
    def run_gemini(self, prompt):
        try:
            resp = self.model.generate_content(
                prompt,
                generation_config={
                    "temperature": 0.6,
                    "top_p": 0.9,
                    "top_k": 40,
                    "max_output_tokens": 1024
                }
            )
            return resp.text.strip()
        except Exception as e:
            raise Exception(f"Gemini generation failed: {e}")

    def create_insight_dict(self, insight_text, variable, insight_type):
        return {
            "insight_set_type": insight_type,
            "insight_set": [
                {
                    "text": insight_text,
                    "variable": variable
                }
            ]
        }

    def generate_insights(self, df, kpi, kpi_list, top_n=5):
        # 1. Identify leaders & laggers
        leaders, laggers, total_kpi = self.identify_leaders_laggers(df, kpi, top_n=top_n)
        # 2. Generate LLM prompts
        descriptive_prompt, prescriptive_prompt = self.generate_leaders_laggers_prompts(
            leaders, laggers, df, kpi, kpi_list
        )
        # 3. Call Gemini
        with concurrent.futures.ThreadPoolExecutor() as executor:
            future_desc = executor.submit(self.run_gemini, descriptive_prompt)
            future_pres = executor.submit(self.run_gemini, prescriptive_prompt)
            descriptive_insights = future_desc.result()
            prescriptive_insights = future_pres.result()
        # 4. Prepare insight dictionary
        return [
            self.create_insight_dict(descriptive_insights, kpi, "descriptive"),
            self.create_insight_dict(prescriptive_insights, kpi, "prescriptive")
        ]


In [100]:
kpi_list = ['Equity Sales', 'SIP Sales Achievement', 'Net Sales through MARS', 'Investment Net Sales Achievement']
gemini_api_key = google_api_key

generator = DeepInsightGenerator(gemini_api_key)

all_insights = {}
for kpi in kpi_list:
    print(f"Generating insights for KPI: {kpi}")
    all_insights[kpi] = generator.generate_insights(merged_df, kpi, kpi_list, top_n=5)

# Inspect
for kpi, insights in all_insights.items():
    print(f"\n=== {kpi} ===")
    for insight in insights:
        print(insight["insight_set"][0]["text"])
        print("-"*80)


Generating insights for KPI: Equity Sales
Generating insights for KPI: SIP Sales Achievement
Generating insights for KPI: Net Sales through MARS
Generating insights for KPI: Investment Net Sales Achievement

=== Equity Sales ===
Okay, let's dive into an analysis of each partner, identifying their strengths, weaknesses, and impact on the overall Equity Sales KPI.  I'll organize this by Leader and Lagger categories as provided.

**Leaders Analysis**

These partners are driving positive Equity Sales. Let's look at each in detail:

**1. MARZEE MAIDHYOMAH KERAWALA**

*   **Why a Leader:**  Marzee is the clear leader by a significant margin. Their Equity Sales Contribution (0.906%) is almost double that of the next best performer.  Their absolute Equity Sales value (12.46 Billion) is massive.
*   **Impact on Overall KPI:**  Marzee is a *major* contributor to overall positive Equity Sales.  Their performance likely significantly offsets the negative contributions from the laggers.
*   **Patte

### Partners changing course / drastic change in performance

In [102]:
import pandas as pd
import numpy as np
import concurrent.futures
import google.generativeai as genai

kpi_cols = ['Equity Sales', 'SIP Sales Achievement', 'Net Sales through MARS', 'Investment Net Sales Achievement']
threshold_pct_change = 50  
year_col = 'FY_Year'
partner_col = 'Partner Name'
max_partners = 3  

gemini_api_key = google_api_key
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

def detect_drastic_changes(df):
    df_sorted = df.sort_values([partner_col, year_col])
    change_records = []

    for partner, group in df_sorted.groupby(partner_col):
        group = group.sort_values(year_col)
        prev_row = None
        for _, row in group.iterrows():
            if prev_row is not None:
                for kpi in kpi_cols:
                    prev_val = prev_row[kpi]
                    curr_val = row[kpi]
                    if prev_val != 0:
                        pct_change = ((curr_val - prev_val) / abs(prev_val)) * 100
                    else:
                        pct_change = np.nan
                    if pd.notna(pct_change) and abs(pct_change) >= threshold_pct_change:
                        change_records.append({
                            'Partner Name': partner,
                            'KPI': kpi,
                            'Year From': prev_row[year_col],
                            'Year To': row[year_col],
                            'Previous Value': prev_val,
                            'Current Value': curr_val,
                            '% Change': round(pct_change, 2)
                        })
            prev_row = row
    return pd.DataFrame(change_records)

def generate_llm_insights(change_df):
    # Take top max_partners partners with largest changes
    top_partners = change_df.groupby('Partner Name')['% Change'].max().sort_values(ascending=False).head(max_partners).index
    insights_text = ""

    for partner in top_partners:
        partner_data = change_df[change_df['Partner Name'] == partner]
        prompt = f"""
You are an expert data analyst. Analyze the following KPI changes for the partner '{partner}' and generate deep, insightful, 
LLM-style narrative on performance. Focus on patterns, impact, and actionable guidance. Avoid hardcoding phrases; generate based on data.

Data:
{partner_data.to_dict(orient='records')}
"""
        try:
            resp = model.generate_content(
                prompt,
                generation_config={
                    "temperature": 0.7,
                    "top_p": 0.9,
                    "top_k": 40,
                    "max_output_tokens": 1024
                }
            )
            insights_text += f"\nPartner: {partner}\n"
            insights_text += resp.text + "\n" + "-"*80 + "\n"
        except Exception as e:
            insights_text += f"Error generating insights for {partner}: {e}\n"
    return insights_text

change_df = detect_drastic_changes(merged_df)
partner_insights = generate_llm_insights(change_df)
print(partner_insights)



Partner: VIJAYKUMAR MURALIDHAR PATIL
Analyzing the performance data for VIJAYKUMAR MURALIDHAR PATIL reveals a pattern of substantial growth across key performance indicators (KPIs) over the periods examined.

Initially, from 2023 to 2024, we observe a significant surge in 'Equity Sales', demonstrating a growth of 224.24%. This considerable increase suggests a strong ability to attract investors to equity products, possibly driven by effective market strategies or a change in client acquisition tactics. Simultaneously, the 'Investment Net Sales Achievement' experienced an 83.56% rise, reinforcing the notion of improved overall investment product sales. Most strikingly, 'SIP Sales Achievement' displays an astronomical increase of 139243116.67%. This enormous percentage change, though starting from a small base, signifies a remarkable shift towards Systematic Investment Plans, potentially indicating a successful promotion or increased client interest in this investment method.

Continuin

### Partners and areas/business dsnumbers to focus on to improve performance optimally

In [103]:
import pandas as pd
import numpy as np
import concurrent.futures
import google.generativeai as genai

kpi_cols = ['Equity Sales', 'SIP Sales Achievement', 'Net Sales through MARS', 'Investment Net Sales Achievement']
partner_col = 'Partner Name'
target_col_suffix = 'Target'  
max_partners = 3  

gemini_api_key = google_api_key  
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

def identify_focus_areas(df):
    """
    For each partner, identify KPI areas where performance is below target or relative to average.
    Returns a DataFrame of gaps for top N partners.
    """
    focus_records = []
    kpi_avg = df[kpi_cols].mean()

    for partner, group in df.groupby(partner_col):
        partner_row = group.iloc[-1]  
        gaps = {}
        for kpi in kpi_cols:
            target_col = None
            possible_targets = [col for col in df.columns if target_col_suffix in col and kpi in col]
            if possible_targets:
                target_col = possible_targets[0]
                target_value = partner_row[target_col]
            else:
                target_value = kpi_avg[kpi]
            gap_pct = ((target_value - partner_row[kpi]) / target_value) * 100 if target_value else 0
            gaps[kpi] = gap_pct

        sorted_gaps = sorted(gaps.items(), key=lambda x: x[1], reverse=True)
        focus_records.append({
            'Partner Name': partner,
            'Focus KPIs': sorted_gaps
        })

    focus_df = pd.DataFrame(focus_records)
    focus_df['Max Gap'] = focus_df['Focus KPIs'].apply(lambda x: x[0][1])
    focus_df = focus_df.sort_values('Max Gap', ascending=False).head(max_partners)
    return focus_df

def generate_partner_focus_insights(focus_df):
    """
    Generates deep LLM-style insights per partner on areas to focus on for optimal performance.
    """
    insights_text = ""

    for _, row in focus_df.iterrows():
        partner = row['Partner Name']
        focus_kpis = row['Focus KPIs']
        prompt = f"""
You are an expert business analyst. Analyze the performance gaps for the partner '{partner}'.
Focus on these KPIs with underperformance:
{focus_kpis}

Generate a detailed, LLM-style narrative insight, covering:
- Which KPIs or business areas the partner should focus on
- How improvement will impact overall performance
- Recommended actions to improve performance
Avoid hardcoding phrases; generate insights based on the data provided.
"""
        try:
            resp = model.generate_content(
                prompt,
                generation_config={
                    "temperature": 0.7,
                    "top_p": 0.9,
                    "top_k": 40,
                    "max_output_tokens": 1024
                }
            )
            insights_text += f"\nPartner: {partner}\n"
            insights_text += resp.text + "\n" + "-"*80 + "\n"
        except Exception as e:
            insights_text += f"Error generating insights for {partner}: {e}\n"
    return insights_text

focus_df = identify_focus_areas(merged_df)
partner_focus_insights = generate_partner_focus_insights(focus_df)
print(partner_focus_insights)



Partner: Amish Maheshbhai Shah
Okay, let's analyze the performance gaps for partner Amish Maheshbhai Shah, focusing on the provided KPIs: Equity Sales, Investment Net Sales Achievement, Net Sales through MARS, and SIP Sales Achievement. It's clear from the data that there's significant room for improvement across multiple areas of their business.

Amish's performance is notably lagging in several key areas that contribute to overall business success. The most pressing concerns appear to be in **SIP Sales Achievement**, where the partner is significantly underperforming with a value of -121.94. This indicates not only a failure to meet targets but also potentially a decline in SIP sales compared to the baseline or previous periods. This negative performance directly impacts the firm's recurring revenue stream and long-term asset accumulation, which are crucial for sustainable growth. Correcting this issue should be the top priority.

Secondly, **Investment Net Sales Achievement** shows

### combine

In [104]:
import pandas as pd
import numpy as np
import google.generativeai as genai

class PartnerPerformanceAnalyzer:
    def __init__(self, df, api_key):
        """
        Initialize the analyzer with dataset and Gemini API key.
        """
        self.df = df
        self.api_key = api_key
        self.kpi_list = ['Equity Sales', 'SIP Sales Achievement', 
                         'Net Sales through MARS', 'Investment Net Sales Achievement']
        self.thresholds = [25, 50, 75, 80, 90]
        self.partner_col = 'Partner Name'
        self.year_col = 'FY_Year'
        self.target_col_suffix = 'Target'
        self.threshold_pct_change = 50
        self.max_partners = 3
        
        genai.configure(api_key=self.api_key)
        self.model = genai.GenerativeModel("gemini-2.0-flash")

    def _partner_concentration(self, kpi):
        df_sorted = self.df.groupby(self.partner_col)[kpi].sum().sort_values(ascending=False).reset_index()
        total_kpi = df_sorted[kpi].sum()
        df_sorted['Cumulative KPI'] = df_sorted[kpi].cumsum()
        df_sorted['Cumulative KPI %'] = df_sorted['Cumulative KPI'] / total_kpi * 100
        total_partners = len(df_sorted)
        
        conc_summary = {}
        for t in self.thresholds:
            num_partners = df_sorted[df_sorted['Cumulative KPI %'] <= t].shape[0]
            conc_summary[f'Partners for {t}%'] = num_partners
            conc_summary[f'% of Total Partners for {t}%'] = round(num_partners / total_partners * 100, 2)
        return conc_summary, total_partners

    def generate_partner_concentration_insights(self):
        insights = {}
        for kpi in self.kpi_list:
            conc_summary, total_partners = self._partner_concentration(kpi)
            text = f"\n=== Partner Concentration Insight for '{kpi}' ===\n"
            for t in self.thresholds:
                text += f" - Top {conc_summary[f'Partners for {t}%']} partners "\
                        f"({conc_summary[f'% of Total Partners for {t}%']}% of total partners) "\
                        f"contribute {t}% of total {kpi}.\n"
            pct_80_share = conc_summary.get('% of Total Partners for 80%', None)
            text += "\nDeep Insights:\n"
            if pct_80_share is not None:
                if pct_80_share <= 10:
                    text += f" - Highly concentrated: only {pct_80_share}% of partners contribute 80% of {kpi}.\n"
                elif pct_80_share <= 25:
                    text += f" - Moderately concentrated: top {pct_80_share}% of partners contribute 80% of {kpi}.\n"
                else:
                    text += f" - Low concentration: contributions are evenly distributed.\n"
            text += f" - Opportunity: Upskill mid-tier partners to reduce dependency on top {pct_80_share}%.\n"
            text += f" - Risk: Losing top performers could significantly impact {kpi}.\n"
            insights[kpi] = text
        return insights

    def generate_leaders_and_laggers_insights(self):
        insights = {}
        for kpi in self.kpi_list:
            df_kpi = self.df.groupby(self.partner_col)[kpi].sum().reset_index()
            leaders = df_kpi.sort_values(kpi, ascending=False).head(5)
            laggers = df_kpi.sort_values(kpi, ascending=True).head(5)
            text = f"\n=== Leaders and Laggers Insight for '{kpi}' ===\n"
            text += "Top 5 Leaders:\n"
            text += leaders.to_string(index=False) + "\n\n"
            text += "Bottom 5 Laggers:\n"
            text += laggers.to_string(index=False) + "\n\n"
            text += "Deep Insight:\n - Leaders drive majority of KPI. Focus on replicating their practices.\n"
            text += " - Laggers need mentoring or process improvements.\n"
            insights[kpi] = text
        return insights

    def detect_drastic_changes(self):
        df_sorted = self.df.sort_values([self.partner_col, self.year_col])
        change_records = []
        for partner, group in df_sorted.groupby(self.partner_col):
            group = group.sort_values(self.year_col)
            prev_row = None
            for _, row in group.iterrows():
                if prev_row is not None:
                    for kpi in self.kpi_list:
                        prev_val = prev_row[kpi]
                        curr_val = row[kpi]
                        if prev_val != 0:
                            pct_change = ((curr_val - prev_val) / abs(prev_val)) * 100
                        else:
                            pct_change = np.nan
                        if pd.notna(pct_change) and abs(pct_change) >= self.threshold_pct_change:
                            change_records.append({
                                'Partner Name': partner,
                                'KPI': kpi,
                                'Year From': prev_row[self.year_col],
                                'Year To': row[self.year_col],
                                'Previous Value': prev_val,
                                'Current Value': curr_val,
                                '% Change': round(pct_change, 2)
                            })
                prev_row = row
        return pd.DataFrame(change_records)

    def generate_partner_change_insights(self, change_df):
        top_partners = change_df.groupby('Partner Name')['% Change'].max().sort_values(ascending=False).head(self.max_partners).index
        insights_text = ""
        for partner in top_partners:
            partner_data = change_df[change_df['Partner Name'] == partner]
            prompt = f"""
You are a data analyst. Analyze the KPI performance shifts for partner '{partner}'.
Generate insights on improvement/decline patterns and business implications.

Data:
{partner_data.to_dict(orient='records')}
"""
            try:
                resp = self.model.generate_content(
                    prompt,
                    generation_config={"temperature": 0.7, "top_p": 0.9, "top_k": 40, "max_output_tokens": 1024}
                )
                insights_text += f"\n=== Partner: {partner} ===\n{resp.text}\n" + "-"*100 + "\n"
            except Exception as e:
                insights_text += f"Error generating insights for {partner}: {e}\n"
        return insights_text

    def identify_focus_areas(self):
        focus_records = []
        kpi_avg = self.df[self.kpi_list].mean()
        for partner, group in self.df.groupby(self.partner_col):
            partner_row = group.iloc[-1]
            gaps = {}
            for kpi in self.kpi_list:
                target_col = None
                possible_targets = [col for col in self.df.columns if self.target_col_suffix in col and kpi in col]
                if possible_targets:
                    target_col = possible_targets[0]
                    target_value = partner_row[target_col]
                else:
                    target_value = kpi_avg[kpi]
                gap_pct = ((target_value - partner_row[kpi]) / target_value) * 100 if target_value else 0
                gaps[kpi] = gap_pct
            sorted_gaps = sorted(gaps.items(), key=lambda x: x[1], reverse=True)
            focus_records.append({'Partner Name': partner, 'Focus KPIs': sorted_gaps})
        focus_df = pd.DataFrame(focus_records)
        focus_df['Max Gap'] = focus_df['Focus KPIs'].apply(lambda x: x[0][1])
        focus_df = focus_df.sort_values('Max Gap', ascending=False).head(self.max_partners)
        return focus_df

    def generate_focus_area_insights(self, focus_df):
        insights_text = ""
        for _, row in focus_df.iterrows():
            partner = row['Partner Name']
            focus_kpis = row['Focus KPIs']
            prompt = f"""
Analyze the performance gaps for partner '{partner}'.
KPIs with underperformance: {focus_kpis}.
Generate strategic recommendations to improve performance.
"""
            try:
                resp = self.model.generate_content(
                    prompt,
                    generation_config={"temperature": 0.7, "top_p": 0.9, "top_k": 40, "max_output_tokens": 1024}
                )
                insights_text += f"\n=== Partner: {partner} ===\n{resp.text}\n" + "-"*100 + "\n"
            except Exception as e:
                insights_text += f"Error generating insights for {partner}: {e}\n"
        return insights_text

    # =====================================================================
    # Run All
    # =====================================================================
    def run_all_insights(self):
        print("\n" + "="*120)
        print("🔹 Partner Concentration Insights")
        conc = self.generate_partner_concentration_insights()
        for k, v in conc.items():
            print(v)

        print("\n" + "="*120)
        print("🔹 Leaders & Laggers Insights")
        ll = self.generate_leaders_and_laggers_insights()
        for k, v in ll.items():
            print(v)

        print("\n" + "="*120)
        print("🔹 Partners Changing Course Insights")
        change_df = self.detect_drastic_changes()
        change_insights = self.generate_partner_change_insights(change_df)
        print(change_insights)

        print("\n" + "="*120)
        print("🔹 Focus Area Insights")
        focus_df = self.identify_focus_areas()
        focus_insights = self.generate_focus_area_insights(focus_df)
        print(focus_insights)


In [105]:
analyzer = PartnerPerformanceAnalyzer(merged_df, google_api_key)
analyzer.run_all_insights()



🔹 Partner Concentration Insights

=== Partner Concentration Insight for 'Equity Sales' ===
 - Top 264 partners (0.68% of total partners) contribute 25% of total Equity Sales.
 - Top 1044 partners (2.69% of total partners) contribute 50% of total Equity Sales.
 - Top 2804 partners (7.22% of total partners) contribute 75% of total Equity Sales.
 - Top 3391 partners (8.73% of total partners) contribute 80% of total Equity Sales.
 - Top 5052 partners (13.01% of total partners) contribute 90% of total Equity Sales.

Deep Insights:
 - Highly concentrated: only 8.73% of partners contribute 80% of Equity Sales.
 - Opportunity: Upskill mid-tier partners to reduce dependency on top 8.73%.
 - Risk: Losing top performers could significantly impact Equity Sales.


=== Partner Concentration Insight for 'SIP Sales Achievement' ===
 - Top 400 partners (1.03% of total partners) contribute 25% of total SIP Sales Achievement.
 - Top 1481 partners (3.81% of total partners) contribute 50% of total SIP Sal

In [106]:
df1 = pd.read_excel('FY24 - Partner MIS Data.xlsx')

In [109]:
df1.head()

Unnamed: 0,ZM,SRM,RM,BM,Broker Code,Partner Name,Category,Doer Name,Doer Type,Center,Age in NJ,Age in NJ Target,Total AUM FY 23-24 Q4 YTD,Equity+ Hyb AUM FY 23-24 Q4 YTD,LIVE SIP FY 23-24 Q4 YTD,Total Net Sales\nFY 23-24 YTD,Equity Net Sales\nFY 23-24 YTD,Net SIP\nFY 23-24 YTD,MARS AUM FY 23-24 YTD,MARS Net Sales FY 23-24 YTD,PMS AUM FY 23-24 YTD,Net Sales FY 23-24 YTD,Clients Acquired FY 23-24 YTD,Live Accounts FY 23-24 YTD,Saturday School (YTD) FY 23-24 Q4 YTD,Investment\nSaturday School (YTD) FY 23-24 Q4 YTD,Insurance\nSaturday School (YTD) FY 23-24 Q4 YTD,Total Group FY 23-24 Q4 YTD,Group Covered FY 23-24 Q4 YTD,% Covered,Non-NJ AUM\nFY 23-24 Q4 YTD \n(in Cr),Total Reviews\nFY 23-24 Q4 YTD,Amount FY 23-24 Q4 YTD\n(in Cr),Flexicap Target\nFY 23-24 Q4 YTD\n(in Cr),Flexicap Ach\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Target\nFY 23-24 Q4 YTD\n(in Cr),AMC NS Ach\nFY 23-24 Q4 YTD\n(in Cr),Target Qty (FY 23-24 Q4 YTD)\n(in Cr),Order Qty\n(FY 23-24 Q4 YTD)\n(in Cr),Equity Net Sales% Achievement,SIP Sales% Achievement,Client Acquisition% Achievement,Insurance% Achievement,LAS% Achievement,Total % Achievement,Status
0,KOUSHIK GHOSH,,KAMLESH KUMAR,,33054,BIPLAB ROY,NON D,Santanu Deb,Fundz Express,AGARTALA,3,3,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,0,,,,,,,,,0,,,,,,,0.0,,0.0,12.5,,2.5,MUTUAL FUND-Active
1,KOUSHIK GHOSH,,KAMLESH KUMAR,,88928,Santanu Deb,NON D,Santanu Deb,Fundz Express,AGARTALA,55,8,7090659.76,7090659.76,228989.7,1935230.657,1935230.666,95995.23,1999435,1050000,0,0,16,35,,,,,,,,,0,,,,,,,54.58,0.0,119.85,64.58,0.0,70.7,MUTUAL FUND-Active
2,KOUSHIK GHOSH,,KAMLESH KUMAR,,33434,TAPAS RUDRA PAL,NON D,Santanu Deb,Fundz Express,AGARTALA,3,3,2114389.27,2076409.88,108495.24,2082466.446,2045363.633,108495.24,0,0,0,0,30,27,,,,,,,,,0,,,,,,,233.37,,350.68,362.5,0.0,275.01,MUTUAL FUND-Active
3,PRASHANT ANANTRAI KAKKAD,BHAVESH B. JOSHI,ABHAY KUMAR VISHWAKARMA,AALEY NABI,26185,AMIT GANGWAR,NON D,SAURABH AGRAWAL,UNIT MANAGER,AGRA,13,13,885945.96,885945.96,67096.73,567009.9,567009.902,60096.98,0,0,0,0,18,23,,,,,,,,,0,,,,,,,19.75,0.0,58.35,62.5,0.0,30.83,MUTUAL FUND-Active
4,PRASHANT ANANTRAI KAKKAD,BHAVESH B. JOSHI,ABHAY KUMAR VISHWAKARMA,AALEY NABI,21754,ANJANA VARSHNEY,NON D,SAURABH AGRAWAL,UNIT MANAGER,AGRA,19,13,325872.43,325872.43,9999.5,189993.304,189993.304,-8999.55,0,0,0,0,0,2,,,,,,,,,0,,,,,,,6.98,,-9.8,4.17,0.0,-1.22,MUTUAL FUND-Active


In [108]:
df1.columns = df1.iloc[0]
df1 = df1.drop(df1.index[0])
df1 = df1.reset_index(drop=True)
#print(fy24_MIS.head())

In [111]:
df2 = pd.read_excel("FY25 - Partner MIS Data.xlsx")

In [112]:
df2.columns = df2.iloc[0]
df2 = df2.drop(df2.index[0])
df2 = df2.reset_index(drop=True)
#print(fy25_mis.head())

In [113]:
df1['FY_Year'] = '2024'
df2['FY_Year'] = '2025'

In [114]:
MIS_total = pd.concat([df1, df2], axis=0)

In [115]:
MIS_total['RM'].nunique()

31

In [116]:
MIS_total['BM'].nunique()

145

In [117]:
MIS_total['ZM'].nunique()

9