# Import

In [1]:
import pandas as pd
from graphviz import Digraph, Graph

# File path & Load data

In [2]:
# File paths
leads_file = "Data Kaggle\leads_basic_details.csv"
managers_file = "Data Kaggle\sales_managers_assigned_leads_details.csv"
no_interest_file = "Data Kaggle\leads_reasons_for_no_interest.csv"
interactions_file = "Data Kaggle\leads_interaction_details.csv"
demo_file = "Data Kaggle\leads_demo_watched_details.csv"

In [3]:
# Load CSV data
leads_df = pd.read_csv(leads_file)
managers_df = pd.read_csv(managers_file)
no_interest_df = pd.read_csv(no_interest_file)
interactions_df = pd.read_csv(interactions_file)
demo_df = pd.read_csv(demo_file)

In [4]:
# Display first few rows of each file
leads_df.head()

Unnamed: 0,lead_id,age,gender,current_city,current_education,parent_occupation,lead_gen_source
0,USR1001,16,FEMALE,Hyderabad,Intermediate,Private Employee,social_media
1,USR1002,20,MALE,Bengaluru,B.Tech,Business,user_referrals
2,USR1003,20,FEMALE,Visakhapatnam,B.Tech,Lawyer,user_referrals
3,USR1004,16,MALE,Mumbai,Intermediate,IT Employee,user_referrals
4,USR1005,16,MALE,Chennai,Intermediate,Government Employee,user_referrals


In [5]:
managers_df.head()

Unnamed: 0,snr_sm_id,jnr_sm_id,assigned_date,cycle,lead_id
0,SNR501MG,JNR1001MG,1/1/2022,1,USR1001
1,SNR501MG,JNR1001MG,1/1/2022,1,USR1002
2,SNR501MG,JNR1001MG,1/1/2022,1,USR1003
3,SNR501MG,JNR1001MG,1/1/2022,1,USR1004
4,SNR501MG,JNR1001MG,1/1/2022,1,USR1005


In [55]:
no_interest_df.head()

Unnamed: 0,lead_id,reasons_for_not_interested_in_demo,reasons_for_not_interested_to_consider,reasons_for_not_interested_to_convert
0,USR1001,No time for student,,
1,USR1003,,No time for student,
2,USR1004,,Wants offline classes,
3,USR1005,,Can't afford,
4,USR1006,,Student not interested in domain,


In [4]:
no_interest_df["reasons_for_not_interested_in_demo"] = no_interest_df["reasons_for_not_interested_in_demo"].replace("Can't afford", "Cannot afford")

In [56]:
interactions_df.head()

Unnamed: 0,jnr_sm_id,lead_id,lead_stage,call_done_date,call_status,call_reason
0,JNR1001MG,USR1001,lead,2022-01-02,successful,lead_introduction
1,JNR1001MG,USR1001,lead,2022-01-02,successful,demo_schedule
2,JNR1001MG,USR1002,lead,2022-01-03,successful,lead_introduction
3,JNR1001MG,USR1002,lead,2022-01-04,successful,demo_schedule
4,JNR1001MG,USR1002,awareness,2022-01-05,successful,post_demo_followup


In [57]:
demo_df.head()

Unnamed: 0,lead_id,demo_watched_date,language,watched_percentage
0,USR1002,1/4/2022,Telugu,42
1,USR1003,1/7/2022,Telugu,81
2,USR1004,1/2/2022,Telugu,35
3,USR1005,1/3/2022,Hindi,38
4,USR1006,1/12/2022,Hindi,54


# Test Graph

In [5]:
from graphviz import Digraph

# Create a Digraph object
dot = Digraph("Customer_Acquisition", format="png")

# Set graph direction from Left to Right
dot.attr(rankdir="LR")

# Define Node Styles
dot.attr("node", shape="box", style="filled", fontname="Arial", fontsize="12")

# Stages in the Flow
dot.node("Lead", "Lead", fillcolor="deepskyblue", fontcolor="white")
dot.node("Awareness", "Awareness\n(Demo Video)", fillcolor="deepskyblue", fontcolor="white")
dot.node("Consideration", "Consideration", fillcolor="deepskyblue", fontcolor="white")
dot.node("Conversion", "Conversion", fillcolor="deepskyblue", fontcolor="white")

# Decision Points
dot.node("Decision1", "Lead’s Interest?", shape="diamond", fillcolor="white", fontcolor="black")
dot.node("Decision2", "Lead’s Interest?", shape="diamond", fillcolor="white", fontcolor="black")
dot.node("Decision3", "Lead’s Interest?", shape="diamond", fillcolor="white", fontcolor="black")

# Drop Points
dot.node("Drop1", "Drop", fillcolor="orangered", fontcolor="white")
dot.node("Drop2", "Drop", fillcolor="orangered", fontcolor="white")
dot.node("Drop3", "Drop", fillcolor="orangered", fontcolor="white")

# Edges (Connections)
dot.edge("Lead", "Decision1", label="Introduction/\nDemo Schedule Call")
dot.edge("Decision1", "Drop1", label="No")
dot.edge("Decision1", "Awareness", label="Yes")

dot.edge("Awareness", "Decision2", label="Post Demo -\nFollow-up Call")
dot.edge("Decision2", "Drop2", label="No")
dot.edge("Decision2", "Consideration", label="Yes")

dot.edge("Consideration", "Decision3", label="Call to Know\nInterest for Conversion")
dot.edge("Decision3", "Drop3", label="No")
dot.edge("Decision3", "Conversion", label="Yes - Payment/\nSuccessful Conversion")

# Additional Follow-up Paths (Dotted Lines)
dot.edge("Awareness", "Lead", label="Did Not Attend\nDemo", style="dashed")
dot.edge("Consideration", "Awareness", label="Follow-up Call\nfor Consideration", style="dashed")
dot.edge("Conversion", "Consideration", label="Follow-up Call for\nConversion/Failed Payment", style="dashed")

# Render and Save the Graph
dot.render("C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/ca_flow", format="png", view=True)


'C:\\Users\\029at\\Desktop\\Big Data\\Seminar\\Data Kaggle\\data\\ca_flow.png'

In [6]:
import pandas as pd
from graphviz import Digraph

def create_customer_acquisition_flow(interactions_df, no_interest_df, demo_df):
    dot = Digraph(format='png')
    dot.attr(rankdir='LR', size='16', dpi='300')

    # Compute lead counts at each stage
    lead_stage_counts = interactions_df.groupby("lead_stage")["lead_id"].nunique()

    # Call success rate per stage
    call_success_rate = interactions_df.groupby("lead_stage")["call_status"].apply(
        lambda x: (x == "successful").sum() / len(x) * 100
    ).fillna(0)  # Handle NaN

    # Drop-off counts calculated dynamically (Leads lost at each stage)
    total_leads = lead_stage_counts.get('lead', 0)
    awareness_leads = lead_stage_counts.get('awareness', 0)
    consideration_leads = lead_stage_counts.get('consideration', 0)
    conversion_leads = lead_stage_counts.get('conversion', 0)

    drop_awareness = total_leads - awareness_leads
    drop_consideration = awareness_leads - consideration_leads
    drop_conversion = consideration_leads - conversion_leads

    # Define Nodes dynamically
    dot.node("Lead", f"Lead\n({total_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Awareness", f"Awareness (Demo Video)\n({awareness_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Consideration", f"Consideration\n({consideration_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Conversion", f"Conversion\n({conversion_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Drop", "Drop", shape="box", style="filled", fillcolor="orangered")

    # Define Decision Points
    dot.node("Interest1", "Lead’s Interest", shape="diamond")
    dot.node("Interest2", "Lead’s Interest", shape="diamond")
    dot.node("Interest3", "Lead’s Interest", shape="diamond")

    # Define Edges with dynamic insights
    dot.edge("Lead", "Interest1", f"Introduction/Demo Call\n(Success Rate: {call_success_rate.get('lead', 0):.1f}%)")
    dot.edge("Interest1", "Drop", f"No ({drop_awareness} dropped at demo stage)")
    dot.edge("Interest1", "Awareness", "Yes")

    dot.edge("Awareness", "Interest2", f"Post Demo Follow-up\n(Success Rate: {call_success_rate.get('awareness', 0):.1f}%)")
    dot.edge("Interest2", "Drop", f"No ({drop_consideration} dropped at consideration stage)")
    dot.edge("Interest2", "Consideration", "Yes")

    dot.edge("Consideration", "Interest3", f"Conversion Follow-up\n(Success Rate: {call_success_rate.get('consideration', 0):.1f}%)")
    dot.edge("Interest3", "Drop", f"No ({drop_conversion} dropped at conversion stage)")
    dot.edge("Interest3", "Conversion", "Yes - Payment Successful")

    return dot

# Example usage
dynamic_graph = create_customer_acquisition_flow(interactions_df, no_interest_df, demo_df)
dynamic_graph.render('C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/ca_flow_fixed', format='png', view=True)


'C:\\Users\\029at\\Desktop\\Big Data\\Seminar\\Data Kaggle\\data\\ca_flow_fixed.png'

In [7]:
import pandas as pd
from graphviz import Digraph

def create_customer_acquisition_flow(leads_df, interactions_df, no_interest_df, demo_df, managers_df):
    dot = Digraph(format='png')
    dot.attr(rankdir='LR', size='16', dpi='300')

    # --- Compute Lead Counts at Each Stage ---
    lead_stage_counts = interactions_df.groupby("lead_stage")["lead_id"].nunique()

    # --- Compute Call Success Rate per Stage ---
    call_success_rate = interactions_df.groupby("lead_stage")["call_status"].apply(
        lambda x: (x == "successful").sum() / len(x) * 100
    ).fillna(0)

    # --- Compute Drop-Offs Dynamically ---
    total_leads = lead_stage_counts.get('lead', 0)
    awareness_leads = lead_stage_counts.get('awareness', 0)
    consideration_leads = lead_stage_counts.get('consideration', 0)
    conversion_leads = lead_stage_counts.get('conversion', 0)

    drop_awareness = total_leads - awareness_leads
    drop_consideration = awareness_leads - consideration_leads
    drop_conversion = consideration_leads - conversion_leads

    # --- Compute Demo Insights ---
    demo_watched_count = demo_df["lead_id"].nunique()
    demo_attendance_rate = (demo_watched_count / awareness_leads) * 100 if awareness_leads > 0 else 0
    avg_watch_percentage = demo_df["watched_percentage"].mean()
    top_demo_language = demo_df["language"].mode()[0] if not demo_df.empty else "N/A"

    # --- Compute Lead Source Analysis ---
    lead_sources = leads_df["lead_gen_source"].value_counts()
    top_source = lead_sources.idxmax() if not lead_sources.empty else "N/A"

    # --- Sales Manager Insights ---
    avg_cycles_per_conversion = managers_df.groupby("jnr_sm_id")["cycle"].nunique().mean()
    
    # --- Define Nodes with Additional Insights ---
    dot.node("Lead", f"Lead\n({total_leads} leads)\nTop Source: {top_source}", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Awareness", f"Awareness (Demo Video)\n({awareness_leads} leads)\nDemo Attendance: {demo_attendance_rate:.1f}%\nAvg Watch: {avg_watch_percentage:.1f}%\nTop Language: {top_demo_language}", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Consideration", f"Consideration\n({consideration_leads} leads)\nAvg Cycles: {avg_cycles_per_conversion:.1f}", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Conversion", f"Conversion\n({conversion_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Drop", "Drop", shape="box", style="filled", fillcolor="orangered")

    # --- Define Decision Points ---
    dot.node("Interest1", "Lead’s Interest", shape="diamond")
    dot.node("Interest2", "Lead’s Interest", shape="diamond")
    dot.node("Interest3", "Lead’s Interest", shape="diamond")

    # --- Define Edges with Dynamic Insights ---
    dot.edge("Lead", "Interest1", f"Introduction/Demo Call\n(Success Rate: {call_success_rate.get('lead', 0):.1f}%)")
    dot.edge("Interest1", "Drop", f"No ({drop_awareness} dropped at demo stage)")
    dot.edge("Interest1", "Awareness", "Yes")

    dot.edge("Awareness", "Interest2", f"Post Demo Follow-up\n(Success Rate: {call_success_rate.get('awareness', 0):.1f}%)")
    dot.edge("Interest2", "Drop", f"No ({drop_consideration} dropped at consideration stage)")
    dot.edge("Interest2", "Consideration", "Yes")

    dot.edge("Consideration", "Interest3", f"Conversion Follow-up\n(Success Rate: {call_success_rate.get('consideration', 0):.1f}%)")
    dot.edge("Interest3", "Drop", f"No ({drop_conversion} dropped at conversion stage)")
    dot.edge("Interest3", "Conversion", "Yes - Payment Successful")

    return dot

# Example usage
dynamic_graph = create_customer_acquisition_flow(leads_df, interactions_df, no_interest_df, demo_df, managers_df)
dynamic_graph.render('C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/ca_flow_enhanced', format='png', view=True)


'C:\\Users\\029at\\Desktop\\Big Data\\Seminar\\Data Kaggle\\data\\ca_flow_enhanced.png'

In [8]:
import pandas as pd
from graphviz import Digraph

def create_customer_acquisition_flow(leads_df, interactions_df, no_interest_df, demo_df, managers_df):
    dot = Digraph(format='png')
    dot.attr(rankdir='LR', size='16', dpi='300')

    # --- Compute Lead Counts at Each Stage ---
    lead_stage_counts = interactions_df.groupby("lead_stage")["lead_id"].nunique()

    # --- Compute Call Success Rate per Stage ---
    call_success_rate = interactions_df.groupby("lead_stage")["call_status"].apply(
        lambda x: (x == "successful").sum() / len(x) * 100
    ).fillna(0)

    # --- Compute Drop-Offs Dynamically ---
    total_leads = lead_stage_counts.get('lead', 0)
    awareness_leads = lead_stage_counts.get('awareness', 0)
    consideration_leads = lead_stage_counts.get('consideration', 0)
    conversion_leads = lead_stage_counts.get('conversion', 0)

    drop_awareness = total_leads - awareness_leads
    drop_consideration = awareness_leads - consideration_leads
    drop_conversion = consideration_leads - conversion_leads

    # --- Extract Top No-Interest Reasons ---
    def get_top_reason(column):
        """Extract the most common drop-off reason from a specific column."""
        return no_interest_df[column].value_counts().idxmax() if column in no_interest_df and not no_interest_df[column].isna().all() else "N/A"

    reason_no_demo = get_top_reason("reasons_for_not_interested_in_demo")
    reason_no_consideration = get_top_reason("reasons_for_not_interested_in_consideration")
    reason_no_conversion = get_top_reason("reasons_for_not_interested_in_conversion")

    # --- Compute Demo Insights ---
    demo_watched_count = demo_df["lead_id"].nunique()
    demo_attendance_rate = (demo_watched_count / awareness_leads) * 100 if awareness_leads > 0 else 0
    avg_watch_percentage = demo_df["watched_percentage"].mean()
    top_demo_language = demo_df["language"].mode()[0] if not demo_df.empty else "N/A"

    # --- Compute Lead Source Analysis ---
    lead_sources = leads_df["lead_gen_source"].value_counts()
    top_source = lead_sources.idxmax() if not lead_sources.empty else "N/A"

    # --- Sales Manager Insights ---
    avg_cycles_per_conversion = managers_df.groupby("jnr_sm_id")["cycle"].nunique().mean()
    
    # --- Define Nodes with Additional Insights ---
    dot.node("Lead", f"Lead\n({total_leads} leads)\nTop Source: {top_source}", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Awareness", f"Awareness (Demo Video)\n({awareness_leads} leads)\nDemo Attendance: {demo_attendance_rate:.1f}%\nAvg Watch: {avg_watch_percentage:.1f}%\nTop Language: {top_demo_language}", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Consideration", f"Consideration\n({consideration_leads} leads)\nAvg Cycles: {avg_cycles_per_conversion:.1f}", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Conversion", f"Conversion\n({conversion_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")

    # --- Drop-Off Nodes with Reasons ---
    dot.node("Drop_Demo", f"Dropped (No Demo)\n({drop_awareness} leads)\nReason: {reason_no_demo}", shape="box", style="filled", fillcolor="orangered")
    dot.node("Drop_Consideration", f"Dropped (No Consideration)\n({drop_consideration} leads)\nReason: {reason_no_consideration}", shape="box", style="filled", fillcolor="orangered")
    dot.node("Drop_Conversion", f"Dropped (No Conversion)\n({drop_conversion} leads)\nReason: {reason_no_conversion}", shape="box", style="filled", fillcolor="orangered")

    # --- Define Decision Points ---
    dot.node("Interest1", "Lead’s Interest", shape="diamond")
    dot.node("Interest2", "Lead’s Interest", shape="diamond")
    dot.node("Interest3", "Lead’s Interest", shape="diamond")

    # --- Define Edges with Dynamic Insights ---
    dot.edge("Lead", "Interest1", f"Introduction/Demo Call\n(Success Rate: {call_success_rate.get('lead', 0):.1f}%)")
    dot.edge("Interest1", "Drop_Demo", f"No ({drop_awareness} dropped)")
    dot.edge("Interest1", "Awareness", "Yes")

    dot.edge("Awareness", "Interest2", f"Post Demo Follow-up\n(Success Rate: {call_success_rate.get('awareness', 0):.1f}%)")
    dot.edge("Interest2", "Drop_Consideration", f"No ({drop_consideration} dropped)")
    dot.edge("Interest2", "Consideration", "Yes")

    dot.edge("Consideration", "Interest3", f"Conversion Follow-up\n(Success Rate: {call_success_rate.get('consideration', 0):.1f}%)")
    dot.edge("Interest3", "Drop_Conversion", f"No ({drop_conversion} dropped)")
    dot.edge("Interest3", "Conversion", "Yes - Payment Successful")

    return dot

# Example usage
dynamic_graph = create_customer_acquisition_flow(leads_df, interactions_df, no_interest_df, demo_df, managers_df)
dynamic_graph.render('C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/ca_flow_no_interest', format='png', view=True)


'C:\\Users\\029at\\Desktop\\Big Data\\Seminar\\Data Kaggle\\data\\ca_flow_no_interest.png'

In [9]:
import pandas as pd
from graphviz import Digraph

def create_customer_acquisition_flow(leads_df, interactions_df, no_interest_df, demo_df, managers_df):
    dot = Digraph(format='png')
    dot.attr(rankdir='LR', size='16', dpi='300')

    # --- Compute Lead Counts at Each Stage ---
    lead_stage_counts = interactions_df.groupby("lead_stage")["lead_id"].nunique()

    # --- Compute Call Success Rate per Stage ---
    call_success_rate = interactions_df.groupby("lead_stage")["call_status"].apply(
        lambda x: (x == "successful").sum() / len(x) * 100
    ).fillna(0)

    # --- Compute Drop-Offs ---
    total_leads = lead_stage_counts.get('lead', 0)
    awareness_leads = lead_stage_counts.get('awareness', 0)
    consideration_leads = lead_stage_counts.get('consideration', 0)
    conversion_leads = lead_stage_counts.get('conversion', 0)

    drop_awareness = total_leads - awareness_leads
    drop_consideration = awareness_leads - consideration_leads
    drop_conversion = consideration_leads - conversion_leads

    # --- Extract All No-Interest Reasons with Counts & Percentages ---
    def get_reason_counts(column, total_drops, limit=3):
        """Get reasons with drop counts and percentages."""
        if column in no_interest_df and not no_interest_df[column].isna().all():
            reasons = no_interest_df[column].dropna().value_counts()
            reason_texts = []
            for reason, count in reasons.items():
                percentage = (count / total_drops) * 100 if total_drops > 0 else 0
                reason_texts.append(f"- {reason} ({count} leads, {percentage:.1f}%)")
                if len(reason_texts) >= limit:
                    break  # Limit reasons to avoid clutter
            return "\n".join(reason_texts) if reason_texts else "- No data"
        return "- No data"

    reasons_no_demo = get_reason_counts("reasons_for_not_interested_in_demo", drop_awareness)
    reasons_no_consideration = get_reason_counts("reasons_for_not_interested_to_consider", drop_consideration)
    reasons_no_conversion = get_reason_counts("reasons_for_not_interested_to_convert", drop_conversion)

    # --- Compute Demo Insights ---
    demo_watched_count = demo_df["lead_id"].nunique()
    demo_attendance_rate = (demo_watched_count / awareness_leads) * 100 if awareness_leads > 0 else 0
    avg_watch_percentage = demo_df["watched_percentage"].mean()
    top_demo_language = demo_df["language"].mode()[0] if not demo_df.empty else "N/A"

    # --- Compute Lead Source Analysis ---
    lead_sources = leads_df["lead_gen_source"].value_counts()
    top_source = lead_sources.idxmax() if not lead_sources.empty else "N/A"

    # --- Sales Manager Insights ---
    avg_cycles_per_conversion = managers_df.groupby("jnr_sm_id")["cycle"].nunique().mean()
    
    # --- Define Nodes with Additional Insights ---
    dot.node("Lead", f"Lead\n({total_leads} leads)\nTop Source: {top_source}", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Awareness", f"Awareness (Demo Video)\n({awareness_leads} leads)\nDemo Attendance: {demo_attendance_rate:.1f}%\nAvg Watch: {avg_watch_percentage:.1f}%\nTop Language: {top_demo_language}", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Consideration", f"Consideration\n({consideration_leads} leads)\nAvg Cycles: {avg_cycles_per_conversion:.1f}", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Conversion", f"Conversion\n({conversion_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")

    # --- Drop-Off Nodes with Reasons & Percentages ---
    dot.node("Drop_Demo", f"Dropped (No Demo)\n({drop_awareness} leads)\n{reasons_no_demo}", shape="box", style="filled", fillcolor="orangered")
    dot.node("Drop_Consideration", f"Dropped (No Consideration)\n({drop_consideration} leads)\n{reasons_no_consideration}", shape="box", style="filled", fillcolor="orangered")
    dot.node("Drop_Conversion", f"Dropped (No Conversion)\n({drop_conversion} leads)\n{reasons_no_conversion}", shape="box", style="filled", fillcolor="orangered")

    # --- Define Decision Points ---
    dot.node("Interest1", "Lead’s Interest", shape="diamond")
    dot.node("Interest2", "Lead’s Interest", shape="diamond")
    dot.node("Interest3", "Lead’s Interest", shape="diamond")

    # --- Define Edges with Dynamic Insights ---
    dot.edge("Lead", "Interest1", f"Introduction/Demo Call\n(Success Rate: {call_success_rate.get('lead', 0):.1f}%)")
    dot.edge("Interest1", "Drop_Demo", f"No ({drop_awareness} dropped)")
    dot.edge("Interest1", "Awareness", "Yes")

    dot.edge("Awareness", "Interest2", f"Post Demo Follow-up\n(Success Rate: {call_success_rate.get('awareness', 0):.1f}%)")
    dot.edge("Interest2", "Drop_Consideration", f"No ({drop_consideration} dropped)")
    dot.edge("Interest2", "Consideration", "Yes")

    dot.edge("Consideration", "Interest3", f"Conversion Follow-up\n(Success Rate: {call_success_rate.get('consideration', 0):.1f}%)")
    dot.edge("Interest3", "Drop_Conversion", f"No ({drop_conversion} dropped)")
    dot.edge("Interest3", "Conversion", "Yes - Payment Successful")

    return dot

# Example usage
dynamic_graph = create_customer_acquisition_flow(leads_df, interactions_df, no_interest_df, demo_df, managers_df)
dynamic_graph.render('C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/ca_flow_reasons_counts', format='png', view=True)


'C:\\Users\\029at\\Desktop\\Big Data\\Seminar\\Data Kaggle\\data\\ca_flow_reasons_counts.png'

In [10]:
import pandas as pd
from graphviz import Digraph

def create_customer_acquisition_flow(leads_df, interactions_df, no_interest_df, demo_df, managers_df):
    dot = Digraph(format='png')
    dot.attr(rankdir='LR', size='16', dpi='300')

    # --- Compute Lead Counts at Each Stage ---
    lead_stage_counts = interactions_df.groupby("lead_stage")["lead_id"].nunique()

    # --- Compute Call Success Rate per Stage ---
    call_success_rate = interactions_df.groupby("lead_stage")["call_status"].apply(
        lambda x: (x == "successful").sum() / len(x) * 100
    ).fillna(0)

    # --- Compute Drop-Offs ---
    total_leads = lead_stage_counts.get('lead', 0)
    awareness_leads = lead_stage_counts.get('awareness', 0)
    consideration_leads = lead_stage_counts.get('consideration', 0)
    conversion_leads = lead_stage_counts.get('conversion', 0)

    drop_awareness = total_leads - awareness_leads
    drop_consideration = awareness_leads - consideration_leads
    drop_conversion = consideration_leads - conversion_leads

    # --- Extract Drop Reasons from `no_interest_df` ---
    def get_reason_counts(column, total_drops):
        """Extracts and formats drop-off reasons with counts & percentages."""
        if column in no_interest_df.columns and not no_interest_df[column].isna().all():
            reasons = no_interest_df[column].dropna().value_counts()
            reason_texts = [
                f"- {reason} ({count} leads, {count / total_drops * 100:.1f}%)"
                for reason, count in reasons.items()
            ]
            return "\n".join(reason_texts) if reason_texts else "- No reasons provided"
        return "- No reasons provided"

    reasons_no_demo = get_reason_counts("reasons_for_not_interested_in_demo", drop_awareness)
    reasons_no_consideration = get_reason_counts("reasons_for_not_interested_to_consider", drop_consideration)
    reasons_no_conversion = get_reason_counts("reasons_for_not_interested_to_convert", drop_conversion)

    # --- Define Drop-Off Nodes with Correct Reasons ---
    dot.node("Drop_Demo", f"Dropped (No Demo)\n({drop_awareness} leads)\n{reasons_no_demo}", shape="box", style="filled", fillcolor="orangered")
    dot.node("Drop_Consideration", f"Dropped (No Consideration)\n({drop_consideration} leads)\n{reasons_no_consideration}", shape="box", style="filled", fillcolor="orangered")
    dot.node("Drop_Conversion", f"Dropped (No Conversion)\n({drop_conversion} leads)\n{reasons_no_conversion}", shape="box", style="filled", fillcolor="orangered")

    # --- Define Other Nodes & Edges ---
    dot.node("Lead", f"Lead\n({total_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Awareness", f"Awareness (Demo Video)\n({awareness_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Consideration", f"Consideration\n({consideration_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Conversion", f"Conversion\n({conversion_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")

    dot.node("Interest1", "Lead’s Interest", shape="diamond")
    dot.node("Interest2", "Lead’s Interest", shape="diamond")
    dot.node("Interest3", "Lead’s Interest", shape="diamond")

    dot.edge("Lead", "Interest1", f"Introduction/Demo Call\n(Success Rate: {call_success_rate.get('lead', 0):.1f}%)")
    dot.edge("Interest1", "Drop_Demo", f"No ({drop_awareness} dropped)")
    dot.edge("Interest1", "Awareness", "Yes")

    dot.edge("Awareness", "Interest2", f"Post Demo Follow-up\n(Success Rate: {call_success_rate.get('awareness', 0):.1f}%)")
    dot.edge("Interest2", "Drop_Consideration", f"No ({drop_consideration} dropped)")
    dot.edge("Interest2", "Consideration", "Yes")

    dot.edge("Consideration", "Interest3", f"Conversion Follow-up\n(Success Rate: {call_success_rate.get('consideration', 0):.1f}%)")
    dot.edge("Interest3", "Drop_Conversion", f"No ({drop_conversion} dropped)")
    dot.edge("Interest3", "Conversion", "Yes - Payment Successful")

    return dot

# Example usage
dynamic_graph = create_customer_acquisition_flow(leads_df, interactions_df, no_interest_df, demo_df, managers_df)
dynamic_graph.render('C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/ca_flow_fixed_reasons', format='png', view=True)


'C:\\Users\\029at\\Desktop\\Big Data\\Seminar\\Data Kaggle\\data\\ca_flow_fixed_reasons.png'

In [11]:
import pandas as pd
from graphviz import Digraph

def create_customer_acquisition_flow(leads_df, interactions_df, no_interest_df, demo_df, managers_df):
    dot = Digraph(format='png')
    dot.attr(rankdir='LR', size='16', dpi='300')

    # Compute Lead Counts at Each Stage
    lead_stage_counts = interactions_df.groupby("lead_stage")["lead_id"].nunique()

    # Compute Call Success Rate per Stage
    call_success_rate = interactions_df.groupby("lead_stage")["call_status"].apply(
        lambda x: (x == "successful").sum() / len(x) * 100
    ).fillna(0)

    # Compute Drop-Offs
    total_leads = lead_stage_counts.get('lead', 0)
    awareness_leads = lead_stage_counts.get('awareness', 0)
    consideration_leads = lead_stage_counts.get('consideration', 0)
    conversion_leads = lead_stage_counts.get('conversion', 0)

    drop_awareness = total_leads - awareness_leads
    drop_consideration = awareness_leads - consideration_leads
    drop_conversion = consideration_leads - conversion_leads

    # Extract Drop Reasons and Create Nodes
    def extract_reasons(column, total_drops, stage):
        if column in no_interest_df.columns and not no_interest_df[column].isna().all():
            reasons = no_interest_df[column].dropna().value_counts()
            reason_nodes = {}
            for reason, count in reasons.items():
                if reason not in reason_nodes:
                    reason_nodes[reason] = f"Reason_{len(reason_nodes) + 1}"
                    dot.node(reason_nodes[reason], reason, shape="oval", style="filled", fillcolor="lightgray")
                dot.edge(stage, reason_nodes[reason], f"{count} leads ({count / total_drops * 100:.1f}%)", style="dashed")
    
    extract_reasons("reasons_for_not_interested_in_demo", drop_awareness, "Interest1")
    extract_reasons("reasons_for_not_interested_to_consider", drop_consideration, "Interest2")
    extract_reasons("reasons_for_not_interested_to_convert", drop_conversion, "Interest3")
    
    # Define Other Nodes
    dot.node("Lead", f"Lead\n({total_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Awareness", f"Awareness (Demo Video)\n({awareness_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Consideration", f"Consideration\n({consideration_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Conversion", f"Conversion\n({conversion_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")

    dot.node("Interest1", "Lead’s Interest", shape="diamond")
    dot.node("Interest2", "Lead’s Interest", shape="diamond")
    dot.node("Interest3", "Lead’s Interest", shape="diamond")

    # Define Edges
    dot.edge("Lead", "Interest1", f"Introduction/Demo Call\n(Success Rate: {call_success_rate.get('lead', 0):.1f}%)")
    dot.edge("Interest1", "Awareness", "Yes")
    dot.edge("Awareness", "Interest2", "Post Demo - Follow up Call")
    dot.edge("Interest2", "Consideration", "Yes")
    dot.edge("Consideration", "Interest3", "Call for Conversion")
    dot.edge("Interest3", "Conversion", "Yes - Payment Successful")

    return dot

# Example usage
dynamic_graph = create_customer_acquisition_flow(leads_df, interactions_df, no_interest_df, demo_df, managers_df)
dynamic_graph.render('C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/ca_flow_updated_3', format='png', view=True)

'C:\\Users\\029at\\Desktop\\Big Data\\Seminar\\Data Kaggle\\data\\ca_flow_updated_3.png'

In [12]:
import pandas as pd
from graphviz import Digraph

def create_customer_acquisition_flow(leads_df, interactions_df, no_interest_df, demo_df):
    dot = Digraph(format='png')
    dot.attr(rankdir='LR', size='16', dpi='300')

    # --- Compute Lead Counts at Each Stage ---
    lead_stage_counts = interactions_df.groupby("lead_stage")["lead_id"].nunique()

    # --- Compute Drop-Offs ---
    total_leads = lead_stage_counts.get('lead', 0)
    awareness_leads = lead_stage_counts.get('awareness', 0)
    consideration_leads = lead_stage_counts.get('consideration', 0)
    conversion_leads = lead_stage_counts.get('conversion', 0)

    drop_awareness = total_leads - awareness_leads
    drop_consideration = awareness_leads - consideration_leads
    drop_conversion = consideration_leads - conversion_leads

    # --- Extract Drop-Off Reasons ---
    def get_reason_counts(column, total_drops, limit=3):
        if column in no_interest_df and not no_interest_df[column].isna().all():
            reasons = no_interest_df[column].dropna().value_counts()
            reason_texts = []
            for reason, count in reasons.items():
                percentage = (count / total_drops) * 100 if total_drops > 0 else 0
                reason_texts.append(f"- {reason} ({count} leads, {percentage:.1f}%)")
                if len(reason_texts) >= limit:
                    break
            return "\n".join(reason_texts) if reason_texts else "- No data"
        return "- No data"

    reasons_no_demo = get_reason_counts("reasons_for_not_interested_in_demo", drop_awareness)
    reasons_no_consideration = get_reason_counts("reasons_for_not_interested_to_consider", drop_consideration)
    reasons_no_conversion = get_reason_counts("reasons_for_not_interested_to_convert", drop_conversion)

    # --- Compute Demo Insights ---
    demo_watched_count = demo_df["lead_id"].nunique()
    demo_attendance_rate = (demo_watched_count / awareness_leads) * 100 if awareness_leads > 0 else 0
    avg_watch_percentage = demo_df["watched_percentage"].mean()

    # --- Define Nodes ---
    dot.node("Lead", f"Lead\n({total_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Awareness", f"Awareness (Demo Video)\n({awareness_leads} leads)\nDemo Attendance: {demo_attendance_rate:.1f}%\nAvg Watch: {avg_watch_percentage:.1f}%", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Consideration", f"Consideration\n({consideration_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")
    dot.node("Conversion", f"Conversion\n({conversion_leads} leads)", shape="box", style="filled", fillcolor="deepskyblue")

    # --- Drop-Off Nodes ---
    dot.node("Drop_Demo", f"Dropped (No Demo)\n({drop_awareness} leads)\n{reasons_no_demo}", shape="box", style="filled", fillcolor="orangered")
    dot.node("Drop_Consideration", f"Dropped (No Consideration)\n({drop_consideration} leads)\n{reasons_no_consideration}", shape="box", style="filled", fillcolor="orangered")
    dot.node("Drop_Conversion", f"Dropped (No Conversion)\n({drop_conversion} leads)\n{reasons_no_conversion}", shape="box", style="filled", fillcolor="orangered")

    # --- Decision Points ---
    dot.node("Interest1", "Lead’s Interest", shape="diamond")
    dot.node("Interest2", "Lead’s Interest", shape="diamond")
    dot.node("Interest3", "Lead’s Interest", shape="diamond")

    # --- Define Edges ---
    dot.edge("Lead", "Interest1", "Introduction/Demo Call")
    dot.edge("Interest1", "Drop_Demo", f"No ({drop_awareness} dropped)")
    dot.edge("Interest1", "Awareness", "Yes")

    dot.edge("Awareness", "Interest2", "Post Demo Follow-up")
    dot.edge("Interest2", "Drop_Consideration", f"No ({drop_consideration} dropped)")
    dot.edge("Interest2", "Consideration", "Yes")

    dot.edge("Consideration", "Interest3", "Conversion Follow-up")
    dot.edge("Interest3", "Drop_Conversion", f"No ({drop_conversion} dropped)")
    dot.edge("Interest3", "Conversion", "Yes - Payment Successful")

    return dot

# Example usage
dynamic_graph = create_customer_acquisition_flow(leads_df, interactions_df, no_interest_df, demo_df)
dynamic_graph.render('C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/ca_flow_ver_N', format='png', view=True)


'C:\\Users\\029at\\Desktop\\Big Data\\Seminar\\Data Kaggle\\data\\ca_flow_ver_N.png'

# Test Graph 2

In [13]:
import pandas as pd
from graphviz import Digraph
from collections import defaultdict

def generate_dynamic_flow_chart(leads_df, demo_df, interactions_df, no_interest_df):
    # Calculate statistics from the data
    total_leads = len(leads_df)
    demo_watched = len(demo_df)
    converted = len(interactions_df[interactions_df['lead_stage'] == 'conversion'])
    
    # Calculate stage transitions
    stage_counts = interactions_df['lead_stage'].value_counts().to_dict()
    lead_to_awareness = len(interactions_df[interactions_df['lead_stage'] == 'awareness'])
    awareness_to_consideration = len(interactions_df[interactions_df['lead_stage'] == 'consideration'])
    consideration_to_conversion = converted
    
    # Calculate dropout rates
    dropout_after_lead = total_leads - lead_to_awareness
    dropout_after_awareness = lead_to_awareness - awareness_to_consideration
    dropout_after_consideration = awareness_to_consideration - consideration_to_conversion
    
    # Reasons for no interest - collect all reasons with counts
    no_interest_reasons = defaultdict(int)
    reason_categories = {}
    
    # Process each category column separately
    for col in no_interest_df.columns[1:]:
        category = col.replace('reasons_for_not_interested_in_', '')  
        category = category.replace('reasons_for_not_interested_to_', '')  
        category = category.replace('_', ' ').title()
        reason_counts = no_interest_df[col].value_counts().to_dict()
        reason_categories[category] = reason_counts
    
    # Create the flow chart
    flow = Digraph('Customer_Acquisition_Flow', filename='dynamic_customer_acquisition_flow.gv')
    flow.attr(rankdir='TB', size='16,12', compound='true')  # Increased size
    flow.attr('node', shape='box', style='rounded', fontname='Arial', fontsize='10')
    flow.attr('edge', fontname='Arial', fontsize='9')
    
    # Main stages with actual counts
    with flow.subgraph(name='cluster_main_stages') as c:
        c.attr(label='Customer Acquisition Key Stages (with actual counts)', style='filled', 
               color='lightgrey', fontsize='12')
        
        c.node('Lead', f'Lead\nTotal: {total_leads}\nSources: {leads_df["lead_gen_source"].nunique()}')
        c.node('Awareness', f'Awareness\nDemo Watched: {demo_watched}\n({demo_df["language"].value_counts().to_dict()})')
        c.node('Consideration', f'Consideration\nFollow-ups: {awareness_to_consideration}')
        c.node('Conversion', f'Conversion\nSuccessful: {converted}\nRate: {converted/total_leads:.1%}')
        
        # Connect main stages with transition counts
        c.edge('Lead', 'Awareness', label=f'{lead_to_awareness} ({lead_to_awareness/total_leads:.1%})')
        c.edge('Awareness', 'Consideration', label=f'{awareness_to_consideration} ({awareness_to_consideration/lead_to_awareness:.1%})')
        c.edge('Consideration', 'Conversion', label=f'{consideration_to_conversion} ({consideration_to_conversion/awareness_to_consideration:.1%})')
    
    # Dropout points with counts
    flow.node('Dropout1', f'Dropout\n{dropout_after_lead} leads\n({dropout_after_lead/total_leads:.1%})', 
              shape='diamond', color='red')
    flow.node('Dropout2', f'Dropout\n{dropout_after_awareness} leads\n({dropout_after_awareness/lead_to_awareness:.1%})', 
              shape='diamond', color='red')
    flow.node('Dropout3', f'Dropout\n{dropout_after_consideration} leads\n({dropout_after_consideration/awareness_to_consideration:.1%})', 
              shape='diamond', color='red')
    
    flow.edge('Lead', 'Dropout1', style='dashed')
    flow.edge('Awareness', 'Dropout2', style='dashed')
    flow.edge('Consideration', 'Dropout3', style='dashed')
    
    # Enhanced Reasons section - create a cluster for all reasons
    with flow.subgraph(name='cluster_reasons') as c:
        c.attr(label='Detailed Reasons for No Interest', style='filled', 
               color='lightyellow', fontsize='12', rank='same')
        
        # Create a table-like structure for each category
        for category, reasons in reason_categories.items():
            if not reasons:
                continue
                
            # Create header node for the category
            c.node(f'{category}_header', 
                  f"<<B>{category}</B>>", 
                  shape='plaintext', fontsize='11')
            
            # Create nodes for each reason in the category
            for reason, count in reasons.items():
                reason_text = f"{reason}: {count} leads"
                node_id = f"{category}_{reason}"
                c.node(node_id, reason_text, shape='box', style='filled', 
                      color='lightcoral', fontsize='9')
                
                # Connect header to reason
                c.edge(f'{category}_header', node_id, style='invis')
        
        # Arrange categories horizontally
        category_headers = [f'{cat}_header' for cat in reason_categories.keys()]
        if len(category_headers) > 1:
            c.attr(rank='same')
            for i in range(len(category_headers)-1):
                c.edge(category_headers[i], category_headers[i+1], style='invis')
    
    # Connect dropout points to reasons cluster
    # flow.edge('Dropout1', 'cluster_reasons', lhead='cluster_reasons', style='dotted')
    # flow.edge('Dropout2', 'cluster_reasons', lhead='cluster_reasons', style='dotted')
    # flow.edge('Dropout3', 'cluster_reasons', lhead='cluster_reasons', style='dotted')
    
    # Call interaction patterns (with error handling)
    with flow.subgraph(name='cluster_call_flow') as c:
        c.attr(label='Common Call Interaction Patterns', style='filled', color='lightblue', fontsize='12')
        
        # Get most common call reasons by stage
        call_reasons = interactions_df.groupby(['lead_stage', 'call_reason']).size().unstack().fillna(0)
        
        # Safely get call counts with default 0 if reason doesn't exist
        def get_call_count(stage, reason):
            try:
                return int(call_reasons.loc[stage, reason])
            except (KeyError, AttributeError):
                return 0
        
        intro_calls = get_call_count('lead', 'lead_introduction')
        demo_scheduled_calls = get_call_count('lead', 'demo_scheduled')
        followup_calls = get_call_count('awareness', 'after_demo_followup')
        converted_calls = get_call_count('conversion', 'successful_conversion')

        c.node('Start', 'Start Call')
        c.node('Intro', f"Introduction\n{intro_calls} calls")
        c.node('Demo', f"Demo Scheduled\n{demo_scheduled_calls} calls")
        c.node('Followup', f"Follow-up\n{followup_calls} calls")
        c.node('ConversionCall', f"Conversion Call\n{converted_calls} calls")
        
        c.edges([
            ('Start', 'Intro'),
            ('Intro', 'Demo'),
            ('Demo', 'Followup'),
            ('Followup', 'ConversionCall')
        ])
    
    # Connect the call flow to main stages
    flow.edge('Lead', 'Start', lhead='cluster_call_flow', constraint='false')
    flow.edge('ConversionCall', 'Conversion', ltail='cluster_call_flow', constraint='false')
    
    return flow

# Generate and display the flow chart
dynamic_flow = generate_dynamic_flow_chart(leads_df, demo_df, interactions_df, no_interest_df)
dynamic_flow.render('C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/deepseek/ca_flow_ver_N5', format='pdf', view=True)

'C:\\Users\\029at\\Desktop\\Big Data\\Seminar\\Data Kaggle\\deepseek\\ca_flow_ver_N5.pdf'

In [106]:
for col in no_interest_df.columns[1:]:
    category = col.replace('reasons_for_not_interested_in_', '')  
    category = category.replace('reasons_for_not_interested_to_', '')  
    category = category.replace('_', ' ').title()
    print(category)


Demo
Consider
Convert


# Sales Hierarchy Graph (Organizational Chart)

In [None]:
def create_filtered_sales_hierarchy(selected_snr_sm=None):
    hierarchy = Digraph("FilteredSalesHierarchy")
    hierarchy.attr(rankdir="TB")

    # If a specific Senior Sales Manager is selected, filter the data
    if selected_snr_sm:
        filtered_managers_df = managers_df[managers_df["snr_sm_id"] == selected_snr_sm]
    else:
        filtered_managers_df = managers_df  # Show all data if no filter

    # Extract filtered relationships
    senior_managers = filtered_managers_df["snr_sm_id"].unique()
    junior_to_leads = filtered_managers_df.groupby("jnr_sm_id")["lead_id"].apply(list).to_dict()
    senior_to_junior = filtered_managers_df.groupby("snr_sm_id")["jnr_sm_id"].unique().to_dict()

    # Add Senior Managers
    for sm in senior_managers:
        hierarchy.node(sm, shape="box", style="filled", fillcolor="purple", fontcolor="white")

    # Add Junior Managers and connect them
    for sm, juniors in senior_to_junior.items():
        for jm in juniors:
            hierarchy.node(jm, shape="box", style="filled", fillcolor="lightblue")
            hierarchy.edge(sm, jm)

    # Add Leads and connect them
    for jm, leads in junior_to_leads.items():
        for lead in leads:
            hierarchy.node(lead, shape="ellipse", style="filled", fillcolor="pink")
            hierarchy.edge(jm, lead)

    # Save and render
    filename = f"C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/sales_hierarchy_{selected_snr_sm if selected_snr_sm else 'all'}"
    # hierarchy.render(filename, format="png", view=False)
    hierarchy.engine = "sfdp"  # Alternative: "neato", "fdp", "twopi"
    hierarchy.render(filename, format="png", view=True)
    return filename

# Example: View only sales hierarchy for a specific Senior Sales Manager
create_filtered_sales_hierarchy("SNR501MG")


'C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/sales_hierarchy_SNR501MG'

In [15]:
import pandas as pd
from graphviz import Digraph

def create_filtered_sales_hierarchy(sales_data_path, selected_snr_sm=None, output_filename="sales_hierarchy"):
    # Load sales hierarchy data
    managers_df = pd.read_csv(sales_data_path)
    
    hierarchy = Digraph("FilteredSalesHierarchy")
    hierarchy.attr(rankdir="TB")
    
    # Filter for specific Senior Sales Manager if provided
    if selected_snr_sm:
        filtered_managers_df = managers_df[managers_df["snr_sm_id"] == selected_snr_sm]
    else:
        filtered_managers_df = managers_df  # Show all data if no filter
    
    # Extract filtered relationships
    senior_managers = filtered_managers_df["snr_sm_id"].unique()
    junior_to_leads = filtered_managers_df.groupby("jnr_sm_id")["lead_id"].apply(list).to_dict()
    senior_to_junior = filtered_managers_df.groupby("snr_sm_id")["jnr_sm_id"].unique().to_dict()
    
    # Add Senior Managers
    for sm in senior_managers:
        hierarchy.node(sm, shape="box", style="filled", fillcolor="purple", fontcolor="white")
    
    # Add Junior Managers and connect them
    for sm, juniors in senior_to_junior.items():
        for jm in juniors:
            hierarchy.node(jm, shape="box", style="filled", fillcolor="lightblue")
            hierarchy.edge(sm, jm)
    
    # Add Leads and connect them
    for jm, leads in junior_to_leads.items():
        for lead in leads:
            hierarchy.node(lead, shape="ellipse", style="filled", fillcolor="pink")
            hierarchy.edge(jm, lead)
    
    # Save and render
    output_path = f"{output_filename}_{selected_snr_sm if selected_snr_sm else 'all'}"
    hierarchy.engine = "sfdp"  # Alternative: "neato", "fdp", "twopi"
    hierarchy.render(output_path, format="png", view=True)
    print(f"Graph saved as {output_path}.png")
    
    return output_path

# Example: View only sales hierarchy for a specific Senior Sales Manager
create_filtered_sales_hierarchy("Data Kaggle\sales_managers_assigned_leads_details.csv", "SNR501MG", "C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/sales_hierarchy")


Graph saved as C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/sales_hierarchy_SNR501MG.png


'C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/sales_hierarchy_SNR501MG'

# Customer Acquisition Flow (Process Flowchart)

In [None]:
# leads_file = "Data Kaggle\leads_basic_details.csv"
# C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/

In [None]:
# def generate_acquisition_flowchart(lead_interaction_path, output_filename="customer_acquisition_flow"):
#     # Load lead interaction data
#     df = pd.read_csv(lead_interaction_path)
    
#     # Define flowchart
#     flowchart = Digraph("Customer Acquisition Flow")
#     flowchart.attr(rankdir="LR")  # Left to right layout
    
#     # Define stages
#     stages = ["Lead", "Awareness", "Consideration", "Conversion"]
    
#     # Count the number of leads in each stage
#     stage_counts = df.groupby("lead_stage")["lead_id"].nunique().to_dict()
    
#     for stage in stages:
#         count = stage_counts.get(stage.lower(), 0)  # Ensure correct case handling
#         flowchart.node(stage, label=f"{stage}\n({count} leads)", shape="box", style="filled", fillcolor="lightblue")
    
#     # Add transitions between stages
#     for i in range(len(stages) - 1):
#         flowchart.edge(stages[i], stages[i + 1], label="Progress")
    
#     # Handle Drop-off Points
#     total_leads = df["lead_id"].nunique()
#     converted_leads = stage_counts.get("conversion", 0)
#     drop_offs = total_leads - converted_leads
    
#     flowchart.node("Drop-off", label=f"Drop-off\n({drop_offs} leads)", shape="ellipse", style="filled", fillcolor="red")
#     flowchart.edge("Consideration", "Drop-off", label=f"{drop_offs} Drop-offs")
    
#     # Render flowchart
#     output_path = f"{output_filename}customer_acquisition_flow"
#     flowchart.render(output_path, format="png", view=True)
#     print(f"Graph saved as {output_path}")


In [None]:
# Example usage
# generate_acquisition_flowchart("Data Kaggle\leads_interaction_details.csv", "C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/")

Graph saved as C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/customer_acquisition_flow


In [16]:
import pandas as pd
from graphviz import Digraph

def generate_acquisition_flowchart(df, demo_df, no_interest_df, output_filename="customer_acquisition_flow"):
    
    # Define flowchart
    flowchart = Digraph("Customer Acquisition Flow")
    flowchart.attr(rankdir="LR")  # Left to right layout
    
    # Define stages
    stages = ["Lead", "Awareness", "Consideration", "Conversion"]
    
    # Count the number of leads in each stage
    stage_counts = df.groupby("lead_stage")["lead_id"].nunique().to_dict()
    total_leads = df["lead_id"].nunique()
    converted_leads = stage_counts.get("conversion", 0)
    drop_offs = total_leads - converted_leads
    
    # Calculate conversion rates
    conversion_rates = {}
    for i in range(len(stages) - 1):
        prev_stage = stages[i].lower()
        next_stage = stages[i + 1].lower()
        if prev_stage in stage_counts and next_stage in stage_counts:
            conversion_rates[prev_stage] = (stage_counts[next_stage] / stage_counts[prev_stage]) * 100 if stage_counts[prev_stage] > 0 else 0
    
    # Calculate call success rate per stage
    call_success_rates = df.groupby("lead_stage")["call_status"].apply(lambda x: (x == "successful").sum() / len(x) * 100 if len(x) > 0 else 0).to_dict()
    
    # Calculate average time spent per stage
    df["call_done_date"] = pd.to_datetime(df["call_done_date"], errors='coerce')
    avg_time_per_stage = df.groupby("lead_stage")["call_done_date"].apply(lambda x: x.diff().mean().days if x.diff().count() > 0 else 0).to_dict()
    
    # Calculate demo influence
    demo_watched_counts = demo_df["lead_id"].nunique()
    demo_conversion_rate = (df[df["lead_stage"] == "consideration"]["lead_id"].nunique() / demo_watched_counts) * 100 if demo_watched_counts > 0 else 0
    
    # Calculate drop-off reasons breakdown
    drop_off_reasons = no_interest_df.melt(id_vars=["lead_id"], var_name="reason_type", value_name="reason").dropna()
    reason_counts = drop_off_reasons["reason"].value_counts().to_dict()
    
    # Add stages with details
    for stage in stages:
        count = stage_counts.get(stage.lower(), 0)
        conversion_rate = conversion_rates.get(stage.lower(), 0)
        call_success = call_success_rates.get(stage.lower(), 0)
        avg_time = avg_time_per_stage.get(stage.lower(), 0)
        
        flowchart.node(stage, label=f"{stage}\n({count} leads)\nConv. Rate: {conversion_rate:.1f}%\nCall Success: {call_success:.1f}%\nAvg Time: {avg_time:.1f} days", shape="box", style="filled", fillcolor="lightblue")
    
    # Add transitions with conversion rates
    for i in range(len(stages) - 1):
        flowchart.edge(stages[i], stages[i + 1], label=f"{conversion_rates.get(stages[i].lower(), 0):.1f}% Conversion")
    
    # Handle Drop-off Points
    flowchart.node("Drop-off", label=f"Drop-off\n({drop_offs} leads)", shape="ellipse", style="filled", fillcolor="red")
    flowchart.edge("Consideration", "Drop-off", label=f"{drop_offs} Drop-offs")
    
    # Add drop-off reasons
    for reason, count in reason_counts.items():
        reason_node = f"Drop-off: {reason}"
        flowchart.node(reason_node, label=f"{reason}\n({count} leads)", shape="ellipse", style="filled", fillcolor="orange")
        flowchart.edge("Drop-off", reason_node)
    
    # Demo influence
    flowchart.node("Demo Influence", label=f"Demo Watched: {demo_watched_counts} leads\nConversion Rate: {demo_conversion_rate:.1f}%", shape="parallelogram", style="filled", fillcolor="yellow")
    flowchart.edge("Awareness", "Demo Influence")
    
    # Render flowchart
    output_path = f"{output_filename}customer_acquisition_flow"
    flowchart.render(output_path, format="png", view=True)
    print(f"Graph saved as {output_path}")


In [17]:
# Example usage
generate_acquisition_flowchart(interactions_df, demo_df, no_interest_df, "C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/")



Graph saved as C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/customer_acquisition_flow


# Call Interaction Success vs Failure (Decision Flow)

In [18]:
set(leads_df['lead_gen_source'])

{'SEO', 'email_marketing', 'social_media', 'user_referrals', 'website'}

In [19]:
import pandas as pd
from graphviz import Digraph

def generate_call_acquisition_flowchart(df, demo_df, no_interest_df, output_filename="customer_acquisition_flow"):
    # Define flowchart
    flowchart = Digraph("Customer Acquisition Flow", format="png")
    flowchart.attr(rankdir="LR")  # Left to right layout

    # Define stages
    stages = ["Lead", "Awareness (Demo Video)", "Consideration", "Conversion"]

    # Count the number of leads in each stage
    stage_counts = df.groupby("lead_stage")["lead_id"].nunique().to_dict()
    total_leads = df["lead_id"].nunique()
    converted_leads = stage_counts.get("conversion", 0)
    drop_offs = total_leads - converted_leads

    # Drop-off reasons
    drop_off_reasons = no_interest_df.melt(id_vars=["lead_id"], var_name="reason_type", value_name="reason").dropna()
    reason_counts = drop_off_reasons["reason"].value_counts().to_dict()

    # Conversion rates
    conversion_rates = {}
    for i in range(len(stages) - 1):
        prev_stage = stages[i].split()[0].lower()
        next_stage = stages[i + 1].split()[0].lower()
        if prev_stage in stage_counts and next_stage in stage_counts:
            conversion_rates[prev_stage] = (stage_counts[next_stage] / stage_counts[prev_stage]) * 100 if stage_counts[prev_stage] > 0 else 0

    # Call success rates per stage
    call_success_rates = df.groupby("lead_stage")["call_status"].apply(lambda x: (x == "successful").sum() / len(x) * 100 if len(x) > 0 else 0).to_dict()

    # Add stages with details
    for stage in stages:
        count = stage_counts.get(stage.split()[0].lower(), 0)
        conversion_rate = conversion_rates.get(stage.split()[0].lower(), 0)
        call_success = call_success_rates.get(stage.split()[0].lower(), 0)
        
        flowchart.node(stage, label=f"{stage}\n({count} leads)\nConv. Rate: {conversion_rate:.1f}%\nCall Success: {call_success:.1f}%", shape="box", style="filled", fillcolor="lightblue")

    # Define transitions with Yes/No decisions
    flowchart.edge("Lead", "Awareness (Demo Video)", label="Yes: Interested in Demo")
    flowchart.node("Drop 1", label="Drop (No Interest / No Response)", shape="ellipse", fillcolor="red")
    flowchart.edge("Lead", "Drop 1", label="No")

    flowchart.edge("Awareness (Demo Video)", "Consideration", label="Yes: Post-Demo Follow-up")
    flowchart.node("Drop 2", label="Drop (Did Not Attend / No Follow-up Interest)", shape="ellipse", fillcolor="red")
    flowchart.edge("Awareness (Demo Video)", "Drop 2", label="No")

    flowchart.edge("Consideration", "Conversion", label="Yes: Interested in Buying")
    flowchart.node("Drop 3", label="Drop (Budget / Product Fit)", shape="ellipse", fillcolor="red")
    flowchart.edge("Consideration", "Drop 3", label="No")

    flowchart.node("Drop 4", label="Drop (Payment Failed / Last-Minute Change)", shape="ellipse", fillcolor="red")
    flowchart.edge("Conversion", "Drop 4", label="Failed Payment")

    # Add drop-off reasons
    for reason, count in reason_counts.items():
        reason_node = f"Drop-off: {reason}"
        flowchart.node(reason_node, label=f"{reason}\n({count} leads)", shape="ellipse", style="filled", fillcolor="orange")
        flowchart.edge("Drop 1", reason_node)  # Connect to appropriate stage if needed

    # Add demo influence
    demo_watched_counts = demo_df["lead_id"].nunique()
    demo_conversion_rate = (df[df["lead_stage"] == "consideration"]["lead_id"].nunique() / demo_watched_counts) * 100 if demo_watched_counts > 0 else 0
    flowchart.node("Demo Influence", label=f"Demo Watched: {demo_watched_counts} leads\nConversion Rate: {demo_conversion_rate:.1f}%", shape="parallelogram", style="filled", fillcolor="yellow")
    flowchart.edge("Awareness (Demo Video)", "Demo Influence")

    # Render flowchart
    output_path = f"{output_filename}_call"
    flowchart.render(output_path, format="png", view=True)
    print(f"Graph saved as {output_path}")




In [20]:

# Example usage:
# Call the function (assuming df, demo_df, and no_interest_df are loaded)
generate_acquisition_flowchart(interactions_df, demo_df, no_interest_df, "C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/")




Graph saved as C:/Users/029at/Desktop/Big Data/Seminar/Data Kaggle/data/customer_acquisition_flow


# Demo Engagement Graph

# Drop-off Reasons Graph