1)users  Behavior Insights

a) Analyze trends in service frequency    
b)Identification of Most Missed Services   
c)Lost and Loyal Customers Data   

2) Parts Consumption Pattern   
a) Most usage parts          
b) most avoid parts         
c) Parts Discount recommendation          

3) Dealer Wise Customers Trend          
a) Dealer wise Analysis of Dealer Wise Service Reduction Trends         
b)Lost Customers by Dealer

Analyze trends in service frequency (1(a))

In [26]:
import pandas as pd
import gradio as gr
import plotly.express as px
import numpy as np

# Load the dataset
df = pd.read_csv('E:\\ArcGIS\\ACI Center\\Data analysis\\preprosess_df.csv')

# Ensure Invoice_Date is in datetime format
df['Invoice_Date'] = pd.to_datetime(df['Invoice_Date'])

# Correctly define 7-day rolling periods (fixed intervals starting from min date)
df['7-Day Interval'] = df['Invoice_Date'].dt.to_period('D').apply(lambda x: x.start_time).dt.floor('7D')

# Get unique dealer codes and service types
unique_dealer_codes = sorted(df["Dealer_Code"].astype(str).unique().tolist())
unique_services = sorted(df["Job_Type_Details"].dropna().unique().tolist())

# Function to get the list of services provided by a dealer
def get_services_by_dealer(dealer_code):
    dealer_data = df[df['Dealer_Code'] == dealer_code]
    if dealer_data.empty:
        return f"No data found for Dealer Code: {dealer_code}", []
    service_types = sorted(dealer_data['Job_Type_Details'].dropna().unique().tolist())
    return pd.DataFrame(service_types, columns=['Service Types'])

# Function to plot 7-day interval service trends with Polynomial Regression
def plot_service_trend(dealer_code, service_name):
    dealer_data = df[df['Dealer_Code'] == dealer_code]
    if dealer_data.empty:
        return f"No data found for Dealer Code: {dealer_code}"
    
    # Filter for the selected service
    service_data = dealer_data[dealer_data['Job_Type_Details'].str.lower() == service_name.lower()]
    if service_data.empty:
        return f"No data found for Service: {service_name} under Dealer: {dealer_code}"
    
    # Group by exact 7-day intervals
    service_trend = service_data.groupby('7-Day Interval')['Job_Type_Details'].count().reset_index()
    service_trend.columns = ['7-Day Interval', 'Service Count']

    # Sort by date
    service_trend['7-Day Interval'] = pd.to_datetime(service_trend['7-Day Interval'])
    service_trend = service_trend.sort_values(by='7-Day Interval')

    # Polynomial Regression (2nd Degree)
    x = np.arange(len(service_trend))  # Convert date index to numbers
    y = service_trend['Service Count'].values
    
    if len(x) > 2:
        poly_coeffs = np.polyfit(x, y, 2)
        poly_trend = np.poly1d(poly_coeffs)
        service_trend['Trend Line'] = poly_trend(x)
    else:
        service_trend['Trend Line'] = y  

    # Plot using Plotly
    fig = px.bar(
        service_trend,
        x='7-Day Interval',
        y='Service Count',
        title=f'Trend of "{service_name}" Service (7-Day Interval) - Dealer: {dealer_code}',
        labels={'Service Count': 'Total Services'},
        text='Service Count',
        color_discrete_sequence=['blue'],
        width=1000,
        height=500
    )
    
    # Add Polynomial Regression Trend Line
    fig.add_scatter(
        x=service_trend['7-Day Interval'],
        y=service_trend['Trend Line'],
        mode='lines',
        name='Trend Line (Polynomial Regression)',
        line=dict(color='red', width=3, dash='dash')
    )
    
    fig.update_layout(
        xaxis_title='Date',
        yaxis_title='Service Count',
        title_x=0.5,
        font=dict(size=14)
    )
    
    return fig

# Gradio Interface
with gr.Blocks() as interface:
    gr.Markdown("# Dealer Service Trend Analysis")

    # Input for Dealer Code (Dropdown + Typing Support)
    with gr.Row():
        dealer_code_input = gr.Dropdown(
            label="Select or Type Dealer Code", 
            choices=unique_dealer_codes, 
            interactive=True
        )
        fetch_services_button = gr.Button("Fetch Services")

    # Output: Table of available services
    service_table_output = gr.DataFrame(label="Available Services", interactive=False)

    # Input for Service Name (Dropdown + Typing Support)
    service_name_input = gr.Dropdown(
        label="Search for a Specific Service Type", 
        choices=unique_services, 
        interactive=True
    )

    # Graph positioned directly below the search box
    service_graph_output = gr.Plot(label="7-Day Interval Service Trend")

    # Fetch services when the button is clicked
    fetch_services_button.click(
        fn=get_services_by_dealer,
        inputs=dealer_code_input,
        outputs=service_table_output
    )

    # Generate the trend graph when a service name is entered
    service_name_input.change(
        fn=plot_service_trend,
        inputs=[dealer_code_input, service_name_input],
        outputs=service_graph_output
    )

# Launch the interface
interface.launch()


* Running on local URL:  http://127.0.0.1:7884

To create a public link, set `share=True` in `launch()`.




Lost and Loyal Customer Data (1(c))

In [27]:
import pandas as pd
import gradio as gr
import plotly.express as px
from datetime import datetime

# Load the dataset
df = pd.read_csv('E:\\ArcGIS\\ACI Center\\Data analysis\\preprosess_df.csv')

# Ensure Invoice_Date is in datetime format
df['Invoice_Date'] = pd.to_datetime(df['Invoice_Date'])

# Get unique dealer codes for dropdown options
unique_dealer_codes = sorted(df["Dealer_Code"].astype(str).unique().tolist())

# Function to rank loyal customers under a given dealer code
def rank_loyal_customers(dealer_code, top_loyal, top_disloyal):
    # Filter the data for the given dealer
    dealer_data = df[df['Dealer_Code'] == dealer_code]

    if dealer_data.empty:
        return f"No data found for Dealer Code: {dealer_code}", pd.DataFrame(), pd.DataFrame()

    # Group by Customer Name
    customer_stats = dealer_data.groupby('Customer_Name').agg(
        First_Service_Date=('Invoice_Date', 'min'),
        Last_Service_Date=('Invoice_Date', 'max'),
        Total_Services=('Invoice_Date', 'count'),
        Unique_Service_Types=('Job_Type_Details', 'nunique')
    ).reset_index()

    # Calculate Range of Service Taking (in days)
    customer_stats['Service_Range_Days'] = (customer_stats['Last_Service_Date'] - customer_stats['First_Service_Date']).dt.days

    # Normalize the values for fair comparison
    max_range = customer_stats['Service_Range_Days'].max()
    max_services = customer_stats['Total_Services'].max()
    max_unique_types = customer_stats['Unique_Service_Types'].max()

    customer_stats['Norm_Range'] = customer_stats['Service_Range_Days'] / max_range if max_range else 0
    customer_stats['Norm_Services'] = customer_stats['Total_Services'] / max_services if max_services else 0
    customer_stats['Norm_Unique_Types'] = customer_stats['Unique_Service_Types'] / max_unique_types if max_unique_types else 0

    # Calculate Weighted Loyalty Score
    customer_stats['Loyalty_Score'] = (
        (customer_stats['Norm_Range'] * 0.33333) +
        (customer_stats['Norm_Services'] * 0.33333) +
        (customer_stats['Norm_Unique_Types'] * 0.33333)
    )

    # Rank customers from most loyal to least loyal
    customer_stats = customer_stats.sort_values(by='Loyalty_Score', ascending=False).reset_index(drop=True)

    # Separate Loyal and Disloyal Customers
    loyal_customers = customer_stats.head(int(top_loyal))
    disloyal_customers = customer_stats.tail(int(top_disloyal))

    return loyal_customers, disloyal_customers

# Function to plot loyal customers
def plot_loyal_customers(dealer_code, top_loyal, top_disloyal):
    loyal_customers, _ = rank_loyal_customers(dealer_code, top_loyal, top_disloyal)
    
    if loyal_customers.empty:
        return f"No loyal customer data found for Dealer Code: {dealer_code}"

    # Plot using Plotly (Color Gradient Light Yellow → Dark Red)
    fig = px.bar(
        loyal_customers,
        x='Customer_Name',
        y='Loyalty_Score',
        title=f'Top {top_loyal} Loyal Customers for Dealer {dealer_code}',
        labels={'Loyalty_Score': 'Loyalty Score'},
        text='Loyalty_Score',
        color='Loyalty_Score',
        color_continuous_scale='YlOrRd'  # Light Yellow → Dark Red Gradient
    )
    fig.update_traces(textposition='outside')
    fig.update_layout(xaxis_title='Customer Name', yaxis_title='Loyalty Score', title_x=0.5)

    return fig

# Function to plot disloyal customers
def plot_disloyal_customers(dealer_code, top_loyal, top_disloyal):
    _, disloyal_customers = rank_loyal_customers(dealer_code, top_loyal, top_disloyal)
    
    if disloyal_customers.empty:
        return f"No disloyal customer data found for Dealer Code: {dealer_code}"

    # Plot using Plotly (Color Gradient Light Yellow → Dark Red)
    fig = px.bar(
        disloyal_customers,
        x='Customer_Name',
        y='Loyalty_Score',
        title=f'Top {top_disloyal} Disloyal Customers for Dealer {dealer_code}',
        labels={'Loyalty_Score': 'Loyalty Score'},
        text='Loyalty_Score',
        color='Loyalty_Score',
        color_continuous_scale='YlOrRd'  # Light Yellow → Dark Red Gradient
    )
    fig.update_traces(textposition='outside')
    fig.update_layout(xaxis_title='Customer Name', yaxis_title='Loyalty Score', title_x=0.5)

    return fig

# Gradio Interface
with gr.Blocks() as interface:
    gr.Markdown("# Loyal & Disloyal Customer Ranking System")

    # Input for Dealer Code (Dropdown + Typing Support)
    with gr.Row():
        dealer_code_input = gr.Dropdown(
            label="Select or Type Dealer Code", 
            choices=unique_dealer_codes, 
            interactive=True
        )
    
    # Inputs for Number of Loyal & Disloyal Customers (Typing Only)
    with gr.Row():
        top_loyal_input = gr.Number(label="Number of Loyal Customers", value=10, precision=0)
        top_disloyal_input = gr.Number(label="Number of Disloyal Customers", value=10, precision=0)
        generate_button = gr.Button("Generate Data")

    # Output: Ranked Table of Loyal Customers
    loyal_table_output = gr.DataFrame(label="Loyal Customers", interactive=False)
    
    # Output: Ranked Table of Disloyal Customers
    disloyal_table_output = gr.DataFrame(label="Disloyal Customers", interactive=False)

    # Output: Graph for Loyal Customers
    loyal_graph_output = gr.Plot(label="Top Loyal Customers")

    # Output: Graph for Disloyal Customers
    disloyal_graph_output = gr.Plot(label="Top Disloyal Customers")

    # Generate ranking when button is clicked
    generate_button.click(
        fn=rank_loyal_customers,
        inputs=[dealer_code_input, top_loyal_input, top_disloyal_input],
        outputs=[loyal_table_output, disloyal_table_output]
    )

    # Generate loyal customer graph when button is clicked
    generate_button.click(
        fn=plot_loyal_customers,
        inputs=[dealer_code_input, top_loyal_input, top_disloyal_input],
        outputs=loyal_graph_output
    )

    # Generate disloyal customer graph when button is clicked
    generate_button.click(
        fn=plot_disloyal_customers,
        inputs=[dealer_code_input, top_loyal_input, top_disloyal_input],
        outputs=disloyal_graph_output
    )

# Launch the interface
interface.launch()




* Running on local URL:  http://127.0.0.1:7885

To create a public link, set `share=True` in `launch()`.




Most usage parts (2(a))

In [None]:
import pandas as pd
import gradio as gr
import plotly.express as px
from matplotlib import pyplot as plt
import seaborn as sns

df = pd.read_csv('E:\\ArcGIS\\ACI Center\\Data analysis\\preprosess_df.csv')

# Data preprocessing
df["Invoice_Date"] = pd.to_datetime(df["Invoice_Date"])
df["Month"] = df["Invoice_Date"].dt.strftime("%B")
month_order = [
    "January",
    "February",
    "March",
    "April",
    "May",
    "June",
    "July",
    "August",
    "September",
    "October",
    "November",
    "December",
]
df["Month"] = pd.Categorical(df["Month"], categories=month_order, ordered=True)
df["Product_Code"] = df["Product_Code"].astype(str).str.strip()

# Get unique product codes
unique_product_codes = sorted(df["Product_Code"].unique().tolist())


def get_top_products_with_percentage(n):
    if not n or n < 1:
        n = 10  # Default value if invalid input
    total_quantity = df["Quantity"].sum()
    product_sales = df.groupby("Product_Code")["Quantity"].sum().reset_index()
    product_sales["Percentage"] = (
        product_sales["Quantity"] / total_quantity * 100
    ).round(2)
    product_sales = product_sales.sort_values(by="Quantity", ascending=False).head(
        int(n)
    )
    return product_sales


# Function to plot monthly sales using Seaborn
def plot_monthly_sales(product_code):
    if not product_code:
        return gr.Plot.update(visible=False)

    # Filter data for the selected product
    product_data = df[df["Product_Code"] == product_code].copy()

    if product_data.empty:
        return gr.Plot.update(visible=False)

    # Calculate monthly sales
    monthly_sales = (
        product_data.groupby("Month", observed=True)["Quantity"].sum().reset_index()
    )
    monthly_sales = monthly_sales.sort_values("Month").copy()

    # Create the plot
    plt.figure(figsize=(10, 5))
    sns.barplot(data=monthly_sales, x="Month", y="Quantity", palette="viridis")

    # Add labels
    plt.xlabel("Month")
    plt.ylabel("Quantity Sold")
    plt.title(f"Monthly Sales for Product {product_code}")

    # Display value labels on top of bars
    for index, row in enumerate(monthly_sales.itertuples()):
        plt.text(index, row.Quantity, str(row.Quantity), ha="center", va="bottom")

    plt.xticks(rotation=45)  # Rotate x-axis labels for readability
    plt.tight_layout()

    return plt.gcf()  # Return the figure for rendering in Gradio


# Gradio Interface
with gr.Blocks() as interface:
    gr.Markdown("# Top Products and Monthly Sales Analysis")

    with gr.Row():
        top_n = gr.Number(
            label="Number of Top Products to Show",
            value=10,
            minimum=1,
            maximum=100,
            step=1,
        )
        search_button = gr.Button("Show Top Products")

    with gr.Row():
        product_table = gr.DataFrame(
            label="Top Products by Sales Volume",
            headers=["Product_Code", "Quantity", "Percentage"],
            interactive=False,
        )

    with gr.Row():
        product_dropdown = gr.Dropdown(
            label="Select Product Code", choices=unique_product_codes, interactive=True
        )

    product_graph = gr.Plot(label="Monthly Sales Graph")

    # Event handlers
    search_button.click(
        fn=get_top_products_with_percentage, inputs=top_n, outputs=product_table
    )

    product_dropdown.change(
        fn=plot_monthly_sales, inputs=product_dropdown, outputs=product_graph
    )

# Launch the interface
interface.launch()

* Running on local URL:  http://127.0.0.1:7886

To create a public link, set `share=True` in `launch()`.







Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.





Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.




most avoid parts (2(b))

In [29]:
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv('E:\\ArcGIS\\ACI Center\\Data analysis\\preprosess_df.csv')

# Ensure Invoice_Date is in datetime format and extract month
df["Invoice_Date"] = pd.to_datetime(df["Invoice_Date"])
df["Month"] = df["Invoice_Date"].dt.strftime("%B")

# Sort months in calendar order
month_order = [
    "January",
    "February",
    "March",
    "April",
    "May",
    "June",
    "July",
    "August",
    "September",
    "October",
    "November",
    "December",
]
df["Month"] = pd.Categorical(df["Month"], categories=month_order, ordered=True)

# Ensure Product_Code is string for consistency
df["Product_Code"] = df["Product_Code"].astype(str).str.strip()

# Get unique product codes for dropdown suggestions
unique_product_codes = df["Product_Code"].unique().tolist()


# Function to get least sold products with percentages
def get_least_products_with_percentage(n):
    total_quantity = df["Quantity"].sum()
    product_sales = df.groupby("Product_Code")["Quantity"].sum().reset_index()
    product_sales["Percentage"] = (product_sales["Quantity"] / total_quantity) * 100
    product_sales = product_sales.sort_values(by="Quantity", ascending=True).head(
        n
    )  # Least sold products
    product_sales.reset_index(drop=True, inplace=True)
    return product_sales


# Function to plot month-wise sales using Seaborn
def plot_monthly_sales(product_code):
    if not product_code:
        return "Please select a product."

    # Filter data for the selected product
    product_data = df[df["Product_Code"] == product_code]

    if product_data.empty:
        return f"No data found for product: {product_code}"

    # Aggregate sales by month
    monthly_sales = product_data.groupby("Month")["Quantity"].sum().reset_index()

    # Ensure correct order of months
    monthly_sales["Month"] = pd.Categorical(
        monthly_sales["Month"], categories=month_order, ordered=True
    )
    monthly_sales = monthly_sales.sort_values("Month")

    # Create the Seaborn plot
    plt.figure(figsize=(10, 5))
    sns.barplot(data=monthly_sales, x="Month", y="Quantity", palette="viridis")

    # Add labels
    plt.xlabel("Month")
    plt.ylabel("Quantity Sold")
    plt.title(f"Monthly Sales for Product {product_code}")

    # Display values on bars
    for index, row in enumerate(monthly_sales.itertuples()):
        plt.text(index, row.Quantity, str(row.Quantity), ha="center", va="bottom")

    plt.xticks(rotation=45)  # Rotate x-axis labels for readability
    plt.tight_layout()

    return plt.gcf()  # Return figure for Gradio


# Gradio Interface
with gr.Blocks() as interface:
    gr.Markdown("# Least Sold Products and Monthly Sales Analysis")

    # Input for number of least sold products and search button
    with gr.Row():
        top_n = gr.Number(
            label="Enter Number of Least Sold Products", value=10, precision=0
        )
        search_button = gr.Button("Search")

    # Table for least sold products
    with gr.Row():
        product_table = gr.DataFrame(
            label="Least Sold Products with Quantity and Percentage", interactive=False
        )

    # Search bar with auto-suggestions for product name
    with gr.Row():
        product_name = gr.Dropdown(
            label="Search for Product Code",
            choices=unique_product_codes,
            interactive=True,
        )
        update_button = gr.Button("Show Monthly Sales")

    product_graph = gr.Plot(label="Monthly Sales Graph")

    # Fetch least sold products and update the product table
    search_button.click(
        fn=lambda n: get_least_products_with_percentage(n),
        inputs=top_n,
        outputs=product_table,
    )

    # Show monthly sales graph for selected product
    update_button.click(
        fn=plot_monthly_sales, inputs=product_name, outputs=product_graph
    )

# Launch the interface
interface.launch()

* Running on local URL:  http://127.0.0.1:7887

To create a public link, set `share=True` in `launch()`.









Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.




Dealer-wise Analysis of Dealer Wise Service Reduction Trends (3(a))

In [None]:
import pandas as pd
import gradio as gr
import plotly.express as px

# Load the dataset
df = pd.read_csv('E:\\ArcGIS\\ACI Center\\Data analysis\\preprosess_df.csv')

# Ensure Invoice_Date is in datetime format and extract month
df['Invoice_Date'] = pd.to_datetime(df['Invoice_Date'])
df['Month'] = df['Invoice_Date'].dt.month

# Correct order for months
month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
interval_order = ['Jan-Feb', 'Mar-Apr', 'May-Jun', 'Jul-Aug', 'Sep-Oct', 'Nov-Dec']

# Create a mapping for month names
month_names = {
    1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr',
    5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug',
    9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'
}

# Function to create 2-month intervals
def get_2_month_interval(month):
    start_month = month_names[(month - 1) // 2 * 2 + 1]
    end_month = month_names[(month - 1) // 2 * 2 + 2]
    return f"{start_month}-{end_month}"

# Get unique dealer codes and service types for dropdowns
unique_dealer_codes = df["Dealer_Code"].astype(str).unique().tolist()
unique_service_types = df["Job_Type_Details"].dropna().astype(str).unique().tolist()

# Function to fetch tables for a specific dealer
def fetch_dealer_data(dealer_code):
    dealer_data = df[df['Dealer_Code'] == dealer_code]
    
    if dealer_data.empty:
        return f"No data found for Dealer Code: {dealer_code}", pd.DataFrame(columns=['Job_Type_Details', 'Count'])
    
    service_ranking = dealer_data[dealer_data['Job_Type_Details'] != "not_applicable"]['Job_Type_Details'] \
        .value_counts() \
        .reset_index()
    service_ranking.columns = ['Service Types', 'Count']
    service_ranking = service_ranking.sort_values(by='Count', ascending=False)
    
    return service_ranking

# Function to plot 2-month frequency of a specific service type
def plot_service_frequency(dealer_code, service_type):
    service_type = service_type.strip().lower()
    
    filtered_data = df[
        (df['Dealer_Code'] == dealer_code) & 
        (df['Job_Type_Details'].str.lower().str.strip() == service_type)
    ]
    
    period_counts = pd.DataFrame({'2-Month Period': interval_order, 'Count': 0})

    if not filtered_data.empty:
        filtered_data['2-Month Period'] = filtered_data['Month'].apply(get_2_month_interval)
        actual_counts = filtered_data['2-Month Period'].value_counts().reset_index()
        actual_counts.columns = ['2-Month Period', 'Count']
        
        period_counts = period_counts.merge(actual_counts, on='2-Month Period', how='left').fillna(0)
        period_counts['Count'] = period_counts['Count_y'].astype(int)
        period_counts = period_counts[['2-Month Period', 'Count']]

    period_counts['2-Month Period'] = pd.Categorical(period_counts['2-Month Period'], categories=interval_order, ordered=True)

    fig = px.bar(
        period_counts,
        x='2-Month Period',
        y='Count',
        title=f'2-Month Frequency of "{service_type}" for Dealer: {dealer_code}',
        labels={'Count': 'Number of Services'},
        text='Count'
    )
    fig.update_traces(textposition='outside')
    fig.update_layout(xaxis_title='2-Month Period', yaxis_title='Frequency', title_x=0.5)
    return fig

# Function to plot service types by month (Fixed Order Issue)
def plot_service_type_by_month(dealer_code):
    dealer_data = df[df['Dealer_Code'] == dealer_code]

    month_df = pd.DataFrame({'Month_Name': month_order})

    if not dealer_data.empty:
        dealer_data['Month_Name'] = dealer_data['Month'].map(month_names)
        actual_counts = dealer_data.groupby(['Month_Name', 'Job_Type_Details']).size().reset_index(name='Count')
        actual_counts = actual_counts[actual_counts['Job_Type_Details'] != "not_applicable"]
        
        service_month_counts = month_df.merge(actual_counts, on='Month_Name', how='left').fillna(0)
        service_month_counts['Count'] = service_month_counts['Count'].astype(int)
    else:
        service_month_counts = month_df.copy()
        service_month_counts['Count'] = 0
        service_month_counts['Job_Type_Details'] = 'No Data'

    service_month_counts['Month_Name'] = pd.Categorical(service_month_counts['Month_Name'], categories=month_order, ordered=True)

    fig = px.bar(
        service_month_counts,
        x='Month_Name',
        y='Count',
        color='Job_Type_Details',
        barmode='stack',
        text='Count',
        title=f'Service Type Frequency by Month for Dealer: {dealer_code}',
        labels={'Month_Name': 'Month', 'Count': 'Number of Services', 'Job_Type_Details': 'Service Type'}
    )
    fig.update_traces(textposition='outside')
    fig.update_layout(xaxis_title='Month', yaxis_title='Frequency', title_x=0.5)
    return fig

# Gradio Interface
with gr.Blocks() as interface:
    gr.Markdown("# Dealer Service Analysis")
    
    with gr.Row():
        dealer_code = gr.Dropdown(
            label="Search for Dealer Code", 
            choices=unique_dealer_codes,  
            interactive=True  
        )
        fetch_button = gr.Button("Fetch Data")
    
    with gr.Row():
        service_table = gr.DataFrame(label="Service Type Ranking", interactive=False)
    
    with gr.Row():
        service_type = gr.Dropdown(
            label="Search for a Specific Service Type", 
            choices=unique_service_types,  
            interactive=True  
        )
        service_graph = gr.Plot(label="2-Month Frequency Graph")
    
    with gr.Row():
        monthly_service_graph = gr.Plot(label="Service Types by Month")
    
    fetch_button.click(
        fn=fetch_dealer_data,
        inputs=dealer_code,
        outputs=service_table
    )
    
    service_type.change(
        fn=plot_service_frequency,
        inputs=[dealer_code, service_type],
        outputs=service_graph
    )
    
    fetch_button.click(
        fn=plot_service_type_by_month,
        inputs=dealer_code,
        outputs=monthly_service_graph
    )

interface.launch()



* Running on local URL:  http://127.0.0.1:7888

To create a public link, set `share=True` in `launch()`.






A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Lost Customers by Dealer (3(b))

In [31]:
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Load the dataset
df = pd.read_csv('E:\\ArcGIS\\ACI Center\\Data analysis\\preprosess_df.csv')

# Ensure Invoice_Date is in datetime format
df["Invoice_Date"] = pd.to_datetime(df["Invoice_Date"])
df["Month"] = df["Invoice_Date"].dt.month

# Replace blank service names with "Blank"
df["Job_Type_Details"] = df["Job_Type_Details"].fillna("Blank")

# Today's date for calculation
TODAY_DATE = datetime(2025, 1, 10)

# Mapping for 3-month intervals
interval_mapping = {
    1: "Jan-Feb-Mar",
    2: "Jan-Feb-Mar",
    3: "Jan-Feb-Mar",
    4: "Apr-May-Jun",
    5: "Apr-May-Jun",
    6: "Apr-May-Jun",
    7: "Jul-Aug-Sep",
    8: "Jul-Aug-Sep",
    9: "Jul-Aug-Sep",
    10: "Oct-Nov-Dec",
    11: "Oct-Nov-Dec",
    12: "Oct-Nov-Dec",
}

# Define interval order
interval_order = ["Jan-Feb-Mar", "Apr-May-Jun", "Jul-Aug-Sep", "Oct-Nov-Dec"]

# Get unique dealer codes for dropdown suggestions
unique_dealer_codes = df["Dealer_Code"].astype(str).unique().tolist()


# Function to generate the bar chart and service table
def generate_data(dealer_code):
    if not dealer_code:
        return "Please select a dealer.", None

    # Filter data for the dealer code
    dealer_data = df[df["Dealer_Code"] == dealer_code]

    if dealer_data.empty:
        return "No data found for the selected dealer.", None

    # Process data for the bar chart
    dealer_data["3-Month Interval"] = dealer_data["Month"].map(interval_mapping)

    plot_data = []
    registered_customers = set()
    previous_customers = set()

    for interval in interval_order:
        current_customers = set(
            dealer_data[dealer_data["3-Month Interval"] == interval]["Customer_Name"]
        )

        new_customers = current_customers - registered_customers
        old_customers = current_customers & registered_customers
        lost_customers = previous_customers - current_customers

        plot_data.append(
            {
                "Interval": interval,
                "Customer Type": "New Customers",
                "Count": len(new_customers),
            }
        )
        plot_data.append(
            {
                "Interval": interval,
                "Customer Type": "Old Customers",
                "Count": len(old_customers),
            }
        )
        plot_data.append(
            {
                "Interval": interval,
                "Customer Type": "Lost Customers",
                "Count": len(lost_customers),
            }
        )

        registered_customers.update(current_customers)
        previous_customers = current_customers

    plot_df = pd.DataFrame(plot_data)

    # Create the Seaborn bar chart
    plt.figure(figsize=(10, 5))
    ax = sns.barplot(
        data=plot_df,
        x="Interval",
        y="Count",
        hue="Customer Type",
        palette={
            "New Customers": "green",
            "Old Customers": "yellow",
            "Lost Customers": "red",
        },
    )

    # Display count values on bars
    for p in ax.patches:
        if p.get_height() > 0:
            ax.annotate(
                f"{int(p.get_height())}",
                (p.get_x() + p.get_width() / 2.0, p.get_height()),
                ha="center",
                va="bottom",
                fontsize=10,
                color="black",
                weight="bold",
            )

    plt.xlabel("3-Month Interval")
    plt.ylabel("Customer Count")
    plt.title(f"Customer Analysis for Dealer: {dealer_code}")
    plt.xticks(rotation=45)
    plt.legend(title="Customer Type")
    plt.tight_layout()

    # Process service table
    service_data = dealer_data[dealer_data["Job_Type_Details"] != "not_applicable"]
    grouped = service_data.groupby("Customer_Name")

    results = []
    for customer, group in grouped:
        service_counts = group["Job_Type_Details"].value_counts().reset_index()
        service_counts.columns = ["Service", "Count"]

        # Create service list
        service_list_table = (
            "<table style='border-collapse: collapse; width: 100%;'>"
            "<tr><th>Service</th><th>Count</th></tr>"
        )
        for _, row in service_counts.iterrows():
            service_list_table += (
                f"<tr><td>{row['Service']}</td><td>{row['Count']}</td></tr>"
            )
        service_list_table += "</table>"

        # Get last service date
        last_service_date = group["Invoice_Date"].max()
        days_since_last_service = (TODAY_DATE - last_service_date).days

        results.append(
            {
                "Customer Name": customer,
                "Service List": service_list_table,
                "Last Service Date": last_service_date.date(),
                "Days Since Last Service": days_since_last_service,
            }
        )

    results_df = pd.DataFrame(results).sort_values(
        by="Days Since Last Service", ascending=False
    )

    # Render the service table as HTML
    html_table = (
        "<table style='border-collapse: collapse; width: 100%;'>"
        "<tr><th>Customer Name</th><th>Service List</th><th>Last Service Date</th><th>Days Since Last Service</th></tr>"
    )
    for _, row in results_df.iterrows():
        html_table += (
            f"<tr><td>{row['Customer Name']}</td>"
            f"<td>{row['Service List']}</td>"
            f"<td>{row['Last Service Date']}</td>"
            f"<td>{row['Days Since Last Service']}</td></tr>"
        )
    html_table += "</table>"

    return plt.gcf(), html_table


# Gradio Interface
with gr.Blocks() as interface:
    gr.Markdown("# Dealer Customer Analysis")

    # Input for dealer code (Dropdown with search)
    dealer_code = gr.Dropdown(
        label="Search for Dealer Code", choices=unique_dealer_codes, interactive=True
    )

    # Button for generating data
    generate_button = gr.Button("Generate Report")

    # Outputs (Plot comes first, then Table)
    customer_chart_output = gr.Plot(label="Customer Interval Bar Chart")
    service_table_output = gr.HTML(label="Customer Service Table")

    # Click event for generating both outputs
    generate_button.click(
        fn=generate_data,
        inputs=dealer_code,
        outputs=[customer_chart_output, service_table_output],
    )

# Launch the interface
interface.launch()

* Running on local URL:  http://127.0.0.1:7889

To create a public link, set `share=True` in `launch()`.






A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



## Discount recomendation  

In [32]:
import pandas as pd
import numpy as np
from scipy.stats import mode
import gradio as gr

# Load dataset (Assuming a CSV file)
df = pd.read_csv('E:\\ArcGIS\\ACI Center\\Data analysis\\preprosess_df.csv')

# Select records with both discount and profit
df = df[(df["Discount"] > 0) & (df["Total_Profit"] > 0)]
df["new_Discount"] = df["Discount"] / df["Quantity"]
df["new_Quantity"] = 1


# Function to get the mode
def get_mode(x):
    m = mode(x, keepdims=True)  # Ensures the output is always an array
    return m.mode[0] if len(m.mode) > 0 else np.nan


# Group by Product_Code & Dealer_Code to compute discount statistics

discount_stats = (
    df.groupby(["Product_Code", "Dealer_Code"])
    .agg(
        mean_discount=("Discount", "mean"),
        min_discount=("Discount", "min"),
        # Use the filtered DataFrame to find max discount for Quantity == 1
        min_quantity=(
            "Discount",
            lambda x: (
                df.loc[x.index, "Quantity"][df.loc[x.index, "Discount"] == x.max()]
            ).min(),
        ),
        max_discount=("Discount", "max"),
        median_discount=("Discount", "median"),
        mode_discount=("Discount", get_mode),
        weighted_discount=(
            "Discount",
            lambda x: (
                np.average(x, weights=df.loc[x.index, "new_Quantity"])
                if df.loc[x.index, "new_Quantity"].sum() > 0
                else np.nan
            ),
        ),
    )
    .reset_index()
)

# Calculate discount variability (standard deviation)
discount_stats["discount_std"] = (
    df.groupby(["Product_Code", "Dealer_Code"])["Discount"].std().values
)


# Function to determine the best discount selection
def select_best_discount(row):
    if row["discount_std"] < 2:
        return row["mean_discount"]  # Use Mean if discount is stable
    else:
        return row["weighted_discount"]  # Use Weighted Avg if discount varies a lot


# Apply selection rule
discount_stats["final_discount"] = discount_stats.apply(select_best_discount, axis=1)

# Get unique product codes and dealer codes for dropdown suggestions
unique_product_codes = discount_stats["Product_Code"].astype(str).unique().tolist()
unique_dealer_codes = discount_stats["Dealer_Code"].astype(str).unique().tolist()


# Function to display the discount table
def show_table(product_code_input, dealer_code_input):
    ###
    df_for_one_prod=discount_stats[discount_stats['Product_Code'] == product_code_input]
    # Define the specific row to show first
    specific_row = df_for_one_prod[df_for_one_prod['Dealer_Code'] == dealer_code_input]

    # Define related rows (excluding the specific row)
    related_rows = df_for_one_prod[df_for_one_prod['Dealer_Code'] != dealer_code_input]

    # Concatenate specific row with related rows
    df_reordered = pd.concat([specific_row, related_rows])
    if df_reordered.shape[0]>50:
        return df_reordered[
        [
            "Product_Code",
            "Dealer_Code",
            "min_discount",
            "min_quantity",
            "max_discount",
            "final_discount",
        ]
       ]
    else:
        return df_reordered[
            [
                "Product_Code",
                "Dealer_Code",
                "min_discount",
                "min_quantity",
                "max_discount",
                "final_discount",
            ]
        ]


# Function to retrieve the discount for a given product and dealer
def get_discount(product_code, dealer_code_input):
    product_code = product_code.upper()
    dealer_code_input = dealer_code_input.upper()

    if (product_code in discount_stats["Product_Code"].values) & (
        dealer_code_input in discount_stats["Dealer_Code"].values
    ):
        result = discount_stats.loc[
            (discount_stats["Product_Code"] == product_code)
            & (discount_stats["Dealer_Code"] == dealer_code_input),
            "final_discount",
        ]
        return float(result.iloc[0]) if not result.empty else "Product Code not found"
    else:
        return 0.0


# Create a Gradio Blocks interface
with gr.Blocks() as demo:
    gr.Markdown("# Discount Analysis Web Interface")

    # Row for inputs and discount output
    with gr.Row():
        product_code_input = gr.Dropdown(
            label="Search for Product Code",
            choices=unique_product_codes,
            interactive=True,
        )

        dealer_code_input = gr.Dropdown(
            label="Search for Dealer Code",
            choices=unique_dealer_codes,
            interactive=True,
        )

        discount_output = gr.Textbox(label="Final Discount", interactive=False)

        fetch_button = gr.Button("Get Discount")

        fetch_button.click(
            get_discount,
            inputs=[product_code_input, dealer_code_input],
            outputs=discount_output,
        )

    # Button to display the full discount table
    table_button = gr.Button("Show Discount Table")

    # Create an empty DataFrame component to display the discount table
    discount_table = gr.DataFrame()

    table_button.click(show_table,inputs=[product_code_input, dealer_code_input], outputs=discount_table)

# Launch the Gradio interface
demo.launch()

* Running on local URL:  http://127.0.0.1:7890

To create a public link, set `share=True` in `launch()`.


