In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import numpy as np

In [2]:
font_dict_WOL = dict(family="Vazir FD-WOL", weight="bold", size=13)
font_dict_R = dict(family="Vazir", weight="bold", size=13)
global_width = 1000
global_height = 500

In [3]:
df = pd.read_csv("Bank_Churn.csv", index_col=0)

----------------------------------------- Step 1 -----------------------------------------

In [None]:
def draw_histogram(
    df, col, title, xtitle, ytitle, width, height,  
    method="FD",  # choose binning method
    bargap=0.1, marker_color='skyblue',
    marker_line_color='black', marker_line_width=0.5, opacity=0.8,
    format_k_labels=False, save=False, scale=2
):
    data = df[col].dropna().values
    n = len(data)

    data_min = data.min()
    data_max = data.max()

    # --- Bin width calculation methods ---
    if method == "FD":  # Freedman–Diaconis
        q25, q75 = np.percentile(data, [25, 75])
        iqr = q75 - q25
        bin_width = 2 * iqr / (n ** (1/3))
    elif method == "Scott":
        bin_width = 3.5 * np.std(data) / (n ** (1/3))
    elif method == "Sturges":
        k = 1 + np.log2(n)
        bin_width = (data_max - data_min) / k
    elif method == "sqrt":
        k = int(np.sqrt(n))
        bin_width = (data_max - data_min) / k
    else:
        raise ValueError("Method must be one of: 'FD', 'Scott', 'Sturges', 'sqrt'")

    # Handle edge cases
    if bin_width == 0:
        bin_width = (data_max - data_min) / (1 + np.log2(n))

    # Round to nearest whole number (minimum 1)
    bin_width = max(1, int(round(bin_width)))

    # Bin edges
    bin_start = data_min
    bin_end = data_max
    bins = np.arange(bin_start, bin_end + bin_width, bin_width)

    # Function to format numbers as "k" if >=1000
    def format_k(x):
        if x >= 1000:
            return f"{int(x/1000)}k"
        return str(int(x))

    # Create custom tick labels like "350-365", optionally formatted
    if format_k_labels:
        tick_labels = [f"{format_k(bins[i])} - {format_k(bins[i+1]-1)}" for i in range(len(bins)-1)]
    else:
        tick_labels = [f"{int(bins[i])} - {int(bins[i+1]-1)}" for i in range(len(bins)-1)]

    tick_positions = bins[:-1] + bin_width/2  # position in the middle of each bin

    # Draw Plotly histogram 
    fig = go.Figure(
        go.Histogram(
            x=data,
            xbins=dict(start=bin_start, end=(bin_end + bin_width), size=bin_width)
        )
    )
    fig.update_layout(
        title=dict(text=title, x=0.5, xanchor='center'),
        # xaxis=dict(title=xtitle, tick0=data_min, dtick=bin_width),
        xaxis=dict(
            title=xtitle,
            tickmode='array',
            tickvals=tick_positions,
            ticktext=tick_labels
        ),
        yaxis=dict(title=ytitle),
        width=width,
        height=height,
        bargap=bargap,
        font=font_dict_WOL
    )
    fig.update_traces(
        marker_color=marker_color,
        marker_line_color=marker_line_color,
        marker_line_width=marker_line_width,
        opacity=opacity
    )
    if save:
        pio.write_image(fig, f"{col}_Histogeram.jpg", width=width, height=height, scale=scale)
    fig.show()

In [38]:
def draw_pie_chart(
    df, col, title, width, height, 
    custom_colors=None, custom_labels=None, 
    save=False, scale=2
):

    # Count occurrences of each category
    counts = df[col].value_counts().reset_index()
    counts.columns = [col, 'Count']  # rename columns for clarity
    
    # Define a better color palette (distinct & colorblind-friendly)
    if custom_colors is None:
        custom_colors = [
            "#636EFA",  # blue
            "#FF6692",  # pink
            "#00CC96",  # green
            "#FECB52",   # yellow
            "#AB63FA",  # purple
            "#FFA15A",  # orange
            "#19D3F3",  # teal
            "#B6E880",  # light green
            "#FF97FF",  # magenta
            "#EF553B"  # red-orange
        ]

    if custom_labels is not None:
        counts[col] = counts[col].replace(custom_labels)
    
    # Create pie chart
    fig = px.pie(
        counts,
        names=col,
        values="Count",
        title=title,
        color_discrete_sequence=custom_colors
    )
    
    # Show percentage and label
    fig.update_traces(textinfo="percent+label")
    
    # Layout: Persian font, centered title
    fig.update_layout(
        width=width,
        height=height,
        font=font_dict_WOL,
        title=dict(text=title, x=0.5, xanchor="center")
    )
    if save:
        pio.write_image(fig, f"{col}_Piechart.jpg", width=width, height=height, scale=scale)
    fig.show()


In [39]:
def draw_bar(
    df, col, title, xtitle, ytitle, width, height,
    marker_color='skyblue',
    marker_line_color='black', marker_line_width=0.5, opacity=0.8,
    horizontal=False, show_labels=True, save=False, scale=2
):

    # Count unique values
    counts = df[col].value_counts().reset_index()
    counts.columns = [col, 'Count']
    
    # Sort values descending
    counts = counts.sort_values('Count', ascending=False)
    
    # Choose orientation
    if horizontal:
        fig = px.bar(
            counts, 
            x='Count', 
            y=col,
            orientation='h',
            text='Count' if show_labels else None
        )
    else:
        fig = px.bar(
            counts, 
            x=col, 
            y='Count', 
            text='Count' if show_labels else None
        )
    
    # Persian font and styling
    fig.update_layout(
        xaxis=dict(title=xtitle, tickmode='array', tickvals=counts[col]),
        yaxis_title=ytitle,
        width=width,
        height=height,
        font=font_dict_WOL,
        title=dict(text=title, x=0.5, xanchor='center')
    )
    
    # Show counts on bars
    fig.update_traces(
        textposition='auto',
        marker_color=marker_color,
        marker_line_color=marker_line_color,
        marker_line_width=marker_line_width,
        opacity=opacity
    )

    if save:
        pio.write_image(fig, f"{col}_Barchart.jpg", width=width, height=height, scale=scale)

    fig.show()

In [None]:
col = "CreditScore"
title = "توزیع مشتریان بر اساس امتیاز اعتباری"
xtitle = "امتیاز اعتباری"
ytitle = "تعداد مشتریان"
width = global_width
hight = global_height
draw_histogram(df, col, title, xtitle, ytitle, width, hight, 
               method='Scott', save=True)

In [None]:
col = "Geography"
title = "سهم هر کشور از کل مشتریان"
width = global_width
height = global_height
custom_colors = [
    "#19D3F3",
    "#FFA15A",
    "#636EFA"
]
custom_labels = {
    "France": "فرانسه",
    "Germany": "آلمان",
    "Spain": "اسپانیا"
}
draw_pie_chart(df, col, title, width, height, 
               custom_colors=custom_colors, 
               custom_labels=custom_labels, save=True)

In [None]:
col = "Gender"
title = "سهم جنسیت ها از کل مشتریان"
width = global_width
height = global_height
custom_labels = {
    "Male": "مرد",
    "Female": "زن"
}
draw_pie_chart(df, col, title, width, height, 
               custom_labels=custom_labels, save=True)

In [None]:
col = "Age"
title = "توزیع مشتریان بر اساس سن"
xtitle = "سن"
ytitle = "تعداد مشتریان"
width = global_width
hight = global_height
draw_histogram(df, col, title, xtitle, ytitle, width, hight, 
               method='Sturges', save=True)

In [None]:
col = "Tenure"
title = "توزیع مشتریان بر اساس تعداد سال های عضویت مشتری"
xtitle = "تعداد سال های عضویت"
ytitle = "تعداد مشتریان"
width = global_width
height = global_height
draw_bar(df, col, title, xtitle, ytitle, width, height, 
         show_labels=False, save=True)

In [None]:
col = "Balance"
title = "توزیع مشتریان بر اساس میزان موجودی حساب"
xtitle = "میزان موجودی حساب"
ytitle = "تعداد مشتریان"
width = global_width
hight = global_height
draw_histogram(df, col, title, xtitle, ytitle, width, hight, 
               method='Scott', format_k_labels=True, save=True)

In [None]:
col = "NumOfProducts"
title = "نسبت مشتریان بر اساس تعداد محصولات بانکی"
width = global_width
height = global_height
custom_colors = [
    "#19D3F3",
    "#FFA15A",
    "#636EFA",
    "#AB63FA"
    
]
custom_labels = {
    1: "یک محصول",
    2: "دو محصول",
    3: "سه محصول",
    4: "چهار محصول"
}
draw_pie_chart(df, col, title, width, height, 
               custom_colors=custom_colors, custom_labels=custom_labels, save=True)

In [None]:
col = "HasCrCard"
title = "نسبت مشتریان دارای کارت اعتباری"
width = global_width
height = global_height
custom_colors = [
    "#00CC96", 
    "#FFA15A"
]
custom_labels = {
    1: "دارای کارت",
    0: "بدون کارت"
}
draw_pie_chart(df, col, title, width, height, 
               custom_colors=custom_colors, custom_labels=custom_labels, save=True)

In [None]:
col = "IsActiveMember"
title = "نسبت مشتریان فعال"
width = global_width
height = global_height
custom_colors = [
    "#00CC96", 
    "#FFA15A"
]
custom_labels = {
    1: "فعال",
    0: "غیرفعال"
}
draw_pie_chart(df, col, title, width, height, 
               custom_colors=custom_colors, custom_labels=custom_labels, save=True)

In [None]:
col = "Exited"
title = "نسبت مشتریان ریزش پیدا کرده"
width = global_width
height = global_height
custom_colors = [
    "#00CC96", 
    "#EF553B"
]
custom_labels = {
    0: "باقی مانده",
    1: "ریزش پیدا کرده"
}
draw_pie_chart(df, col, title, width, height, 
               custom_colors=custom_colors, custom_labels=custom_labels, save=True)