In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from ipysankeywidget import SankeyWidget
from IPython.display import display


from IPython.display import HTML
def View(df):
    css = """<style>
    table { border-collapse: collapse; border: 3px solid #eee; }
    table tr th:first-child { background-color: #eeeeee; color: #333; font-weight: bold }
    table thead th { background-color: #eee; color: #000; }
    tr, th, td { border: 1px solid #ccc; border-width: 1px 0 0 1px; border-collapse: collapse;
    padding: 3px; font-family: monospace; font-size: 10px }</style>
    """
    s  = '<script type="text/Javascript">'
    s += 'var win = window.open("", "Title", "toolbar=no, location=no, directories=no, status=no, menubar=no, scrollbars=yes, resizable=yes, width=780, height=200, top="+(screen.height-400)+", left="+(screen.width-840));'
    s += 'win.document.body.innerHTML = \'' + (df.to_html() + css).replace("\n",'\\') + '\';'
    s += '</script>'
    return(HTML(s+css))

# read tables:
perc = pd.read_csv('List_Report_20250713091309.csv')
chair_mapping = pd.read_excel('chair_mapping_20240801.xlsx')
st_2425 = pd.read_csv('List_Report_20250609080140.csv')
st_2324 = pd.read_csv('List_Report_20250606154459.csv')

In [2]:
def preprocess_sales_data(df, chair_mapping, return_extras=False):

    rename_columns = {
        "Birth date": 'birth date',
        'City': "city",
        'Gender': 'gender',
        'Type': 'status',
        'Date': 'sale_date',
        'Time': 'sale_hour',
        'Base price': 'base_price',
        'Price': 'price',
        'Ticket price types': 'price_type',
        'Stand': 'location_name',
        'Price area': 'price_category',
        'User Id': 'client_number',
        'Area': 'area',
        'Row': 'row',
        'assign using email or ID': "tz",
        'Number': 'seat',
        'Card number': 'card_number',
        'Transaction': 'transaction',
        'Cashier': 'purchaser_email',
        'Co-owner': 'co_purchaser_email',
        'Pelecard_PayType_VisaInstallments': 'payment_type',
        'Voucher number': 'voucher_type',
        'Status': 'st_status'
    }

    area_mapper = {
        'אולם 1 עמידה': 'Court_1S', '1C': 'Court_1C', 'אולם 2': 'Court_2', 'אולם 3': 'Court_3', 'אולם 4': 'Court_4',
        'אולם 5': 'Court_5', 'אולם 6': 'Court_6', 'אולם 7': 'Court_7', 'אולם 8': 'Court_8',
        'אולם 9': 'Court_9', 'אולם 10': 'Court_10', 'אולם 11': 'Court_11', '12 אולם': 'Court_12',
        'גלריה 1': 'Gallery_1', 'גלריה 2': 'Gallery_2', '3 גלריה': 'Gallery_3', 'גלריה 4': 'Gallery_4',
        '5 גלריה': 'Gallery_5', 'גלריה 6': 'Gallery_6', 'גלריה 7': 'Gallery_7', 'גלריה 8': 'Gallery_8',
        'גלריה 9': 'Gallery_9', 'גלריה 10': 'Gallery_10', 'גלריה 11': 'Gallery_11', 'גלריה 12': 'Gallery_12',
        'פרקט מרכזי': 'Court_side_17', 'פרקט צפוני': 'Court_side_18', 'פרקט דרומי': 'Court_side_19',
        'תא 1': 'Suite_1', 'תא 2': 'Suite_2', 'תא 3': 'Suite_3', 'תא 4': 'Suite_4', 'תא 5': 'Suite_5',
        'תא 6': 'Suite_6', 'תא 7': 'Suite_7', 'תא 8': 'Suite_8', 'תא 9': 'Suite_9', 'תא 10': 'Suite_10',
        'תא 11': 'Suite_11', 'תא 12': 'Suite_12', 'תא 13': 'Suite_13', 'תא 14': 'Suite_14',
        'תא 15': 'Suite_15', 'תא 16': 'Suite_16'
    }
    

    presentation_order = {
        'Court_1S': 0,   # אולם 1 עמידה — comes first
        'Court_1C': 1,   # 1C — comes second
        'Court_2': 2,
        'Court_3': 3,
        'Court_4': 4,
        'Court_5': 5,
        'Court_6': 6,
        'Court_7': 7,
        'Court_8': 8,
        'Court_9': 9,
        'Court_10': 10,
        'Court_11': 11,
        'Court_12': 12,
        'Gallery_1': 13,
        'Gallery_2': 14,
        'Gallery_3': 15,
        'Gallery_4': 16,
        'Gallery_5': 17,
        'Gallery_6': 18,
        'Gallery_7': 19,
        'Gallery_8': 20,
        'Gallery_9': 21,
        'Gallery_10': 22,
        'Gallery_11': 23,
        'Gallery_12': 24,
        'Court_side_17': 25,
        'Court_side_18': 26,
        'Court_side_19': 27,
        'Suite_1': 28,
        'Suite_2': 29,
        'Suite_3': 30,
        'Suite_4': 31,
        'Suite_5': 32,
        'Suite_6': 33,
        'Suite_7': 34,
        'Suite_8': 35,
        'Suite_9': 36,
        'Suite_10': 37,
        'Suite_11': 38,
        'Suite_12': 39,
        'Suite_13': 40,
        'Suite_14': 41,
        'Suite_15': 42,
        'Suite_16': 43,
        'Suite_21': 44,
        'Unmarked_6': 45,
        'Unmarked_12': 46
    }


    # Rename columns
    df = df.rename(columns=rename_columns)

    # Convert price
    df['price'] = pd.to_numeric(df.get('price', 0), errors='coerce').fillna(0)

    # Filter status if available
    if 'st_status' in df.columns:
        df = df[df['st_status'] == 'Active'].reset_index(drop=True)

    # Map area names
    if 'area' in df.columns:
        df['area'] = df['area'].replace(area_mapper)

    # Parse date and time
    if 'sale_date' in df.columns:
        df['full_datetime'] = pd.to_datetime(df['sale_date'], errors='coerce')

        df['sale_date'] = df['full_datetime'].dt.date  # <- this keeps only the date part
        df['sale_hour'] = df['full_datetime'].dt.strftime('%H:%M')
        df['weekday_sale'] = pd.to_datetime(df['sale_date'], errors='coerce').dt.day_name()

    # Mark renewals
    # if 'price_type' in df.columns:
    #     df['is_renew'] = df['price_type'].str.contains('renew', na=False)
    #     df['price_type'] = df['price_type'].str.replace(' (renew)', '', regex=False)

    # Mark hapoel purchasers
    if 'purchaser_email' in df.columns:
        df['hapoels_purchaser'] = df['purchaser_email'].str.contains('hapoel|Hapoel|HAPOEL', na=False)

    if return_extras:
        capacity = (
            chair_mapping.groupby('section_name')
            .count()['seat_id']
            .reset_index()
            .rename(columns={'seat_id': 'area_capacity'})
        )
        return df, capacity, presentation_order
    else:
        return df


In [3]:
perc, capacity, presentation_order = preprocess_sales_data(perc, chair_mapping, return_extras=True)
st_2425 = preprocess_sales_data(st_2425, chair_mapping, return_extras=False)

  df['full_datetime'] = pd.to_datetime(df['sale_date'], errors='coerce')


In [4]:
area_mapper = {
    'אולם 1 עמידה': 'Court_1S', '1C': 'Court_1C', 'אולם 2': 'Court_2', 'אולם 3': 'Court_3', 'אולם 4': 'Court_4',
    'אולם 5': 'Court_5', 'אולם 6': 'Court_6', 'אולם 7': 'Court_7', 'אולם 8': 'Court_8',
    'אולם 9': 'Court_9', 'אולם 10': 'Court_10', 'אולם 11': 'Court_11', '12 אולם': 'Court_12',
    'גלריה 1': 'Gallery_1', 'גלריה 2': 'Gallery_2', '3 גלריה': 'Gallery_3', 'גלריה 4': 'Gallery_4',
    '5 גלריה': 'Gallery_5', 'גלריה 6': 'Gallery_6', 'גלריה 7': 'Gallery_7', 'גלריה 8': 'Gallery_8',
    'גלריה 9': 'Gallery_9', 'גלריה 10': 'Gallery_10', 'גלריה 11': 'Gallery_11', 'גלריה 12': 'Gallery_12',
    'פרקט מרכזי': 'Court_side_17', 'פרקט צפוני': 'Court_side_18', 'פרקט דרומי': 'Court_side_19',
    'תא 1': 'Suite_1', 'תא 2': 'Suite_2', 'תא 3': 'Suite_3', 'תא 4': 'Suite_4', 'תא 5': 'Suite_5',
    'תא 6': 'Suite_6', 'תא 7': 'Suite_7', 'תא 8': 'Suite_8', 'תא 9': 'Suite_9', 'תא 10': 'Suite_10',
    'תא 11': 'Suite_11', 'תא 12': 'Suite_12', 'תא 13': 'Suite_13', 'תא 14': 'Suite_14',
    'תא 15': 'Suite_15', 'תא 16': 'Suite_16'
    }

In [5]:
def distributed_by_area(perc, presentation_order, st_2425, area_mapper):
    # SMALL CHANGE WITHOUT GALLERIES
    perc = perc[~perc['area'].isin(['גלריה 2', 'גלריה 6', 'Gallery_12'])]
    st_2425 = st_2425[~st_2425['area'].isin(['גלריה 2', 'גלריה 6', 'Gallery_12'])]

    # Prepare last year tickets
    last_year_tickets = (
        st_2425[st_2425['price'] > 0]
        .groupby('area')
        .size()
        .reset_index(name='last_year_tickets')
    )

    # Mark renewals
    perc['is_renew'] = perc['client_number'].isin(st_2425['client_number'])

    # Mark new members
    perc['is_new'] = ~perc['is_renew']

    stats_by_Court = (
        perc.groupby('area')
            .agg(
                Season_Tickets_Renew=('is_renew', 'sum'),
                Season_Tickets_New=('is_new', 'sum'),
                Total_Price=('price', 'sum')
            )
            .reset_index()
    )

    stats_by_Court['area_velocity'] = stats_by_Court[['Season_Tickets_Renew', 'Season_Tickets_New']].sum(axis=1)
    stats_by_Court['presentation_order'] = stats_by_Court['area'].map(presentation_order)
    stats_by_Court = stats_by_Court.sort_values(by='presentation_order')

    # CHANGED FROM LEFT TO INNER JOIN
    stats_by_Court = stats_by_Court.merge(last_year_tickets, on='area', how='inner')
    stats_by_Court = stats_by_Court[stats_by_Court['area_velocity'] > 0]

    stats_by_Court['remaining_capacity'] = stats_by_Court['last_year_tickets'] - stats_by_Court['area_velocity']
    stats_by_Court['fixed_remaining_capacity'] = np.maximum(stats_by_Court['remaining_capacity'], 0)
    stats_by_Court['ratio_capacity'] = stats_by_Court['area_velocity'] / stats_by_Court['last_year_tickets']
    stats_by_Court['ratio_capacity'] = np.minimum(stats_by_Court['ratio_capacity'], 1).fillna(1)
    stats_by_Court['ratio_capacity_percent'] = (stats_by_Court['ratio_capacity'] * 100).round().fillna(0)

    stats_by_Court['total_tickets'] = stats_by_Court['area_velocity'] + stats_by_Court['fixed_remaining_capacity']

    # Map area names back to Hebrew for plot
    reverse_area_mapper = {v: k for k, v in area_mapper.items()}
    stats_by_Court['area_he'] = stats_by_Court['area'].map(reverse_area_mapper).fillna(stats_by_Court['area'])

    # Override specific values
    stats_by_Court.loc[stats_by_Court['area'] == 'Court_1S', 'area_he'] = 'אולם 1 עמידה'
    stats_by_Court.loc[stats_by_Court['area'] == 'Court_1C', 'area_he'] = '1C'

    # Hover texts
    stats_by_Court['Season_Tickets_Renew_hover_text'] = stats_by_Court.apply(lambda x: f"{x['Season_Tickets_Renew']} Tickets<br>({x['Season_Tickets_Renew']/x['total_tickets']:.2%})", axis=1)
    stats_by_Court['Season_Tickets_New_hover_text'] = stats_by_Court.apply(lambda x: f"{x['Season_Tickets_New']} Tickets<br>({x['Season_Tickets_New']/x['total_tickets']:.2%})", axis=1)
    stats_by_Court['remaining_capacity_hover_text'] = stats_by_Court.apply(lambda x: f"{x['fixed_remaining_capacity']} Tickets<br>({x['fixed_remaining_capacity']/x['total_tickets']:.2%})", axis=1)

    # Plot
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=stats_by_Court['area_he'],
        y=stats_by_Court['Season_Tickets_Renew'],
        name='Renewals',
        marker_color='red',
        hovertext=stats_by_Court['Season_Tickets_Renew_hover_text'],
        hoverinfo='text'
    ))

    fig.add_trace(go.Bar(
        x=stats_by_Court['area_he'],
        y=stats_by_Court['Season_Tickets_New'],
        name='New',
        marker_color='tomato',
        hovertext=stats_by_Court['Season_Tickets_New_hover_text'],
        hoverinfo='text'
    ))

    fig.add_trace(go.Bar(
        x=stats_by_Court['area_he'],
        y=stats_by_Court['fixed_remaining_capacity'],
        name='Remaining Capacity',
        marker_color='grey',
        base=stats_by_Court['area_velocity'],
        hovertext=stats_by_Court['remaining_capacity_hover_text'],
        hoverinfo='text'
    ))

    fig.update_layout(
        title=dict(text='Season Ticket Distribution by Section', font=dict(size=16)),
        xaxis=dict(title='', tickangle=45, title_font=dict(size=14), tickfont=dict(size=10), constrain='domain'),
        yaxis=dict(title='Tickets', title_font=dict(size=14), tickfont=dict(size=12), range=[0, None]),
        barmode='stack',
        bargap=0.3,
        legend=dict(
            x=0.02,  # near the left
            y=0.99,  # top
            bordercolor="Black",
            borderwidth=1,
            font=dict(size=10)
        ),
        margin=dict(l=20, r=20, t=40, b=80),
        width=1200,
        height=600,
        plot_bgcolor='white',
        paper_bgcolor='white',
        title_x=0.25
    )

    # % sold annotations
    for area_he, pct, y1, y2 in zip(stats_by_Court['area_he'], stats_by_Court['ratio_capacity_percent'],
                                    stats_by_Court['Season_Tickets_Renew'], stats_by_Court['Season_Tickets_New']):
        total_height = y1 + y2
        fig.add_annotation(
            x=area_he,
            y=total_height + 20,
            text=f"{int(pct)}%",
            showarrow=False,
            font=dict(size=12, color="black"),
            align='center'
        )

    # ₪ revenue annotations
    for area_he, revenue in zip(stats_by_Court['area_he'], stats_by_Court['Total_Price']):
        fig.add_annotation(
            x=area_he,
            y=0,
            text=f"₪{int(revenue):,}",
            showarrow=False,
            yshift=-30,
            font=dict(size=12, color="blue"),
            textangle=45
        )

    fig.add_annotation(
        text="Percentages represent total sold / total from last year<br>Gray reference total capacity of each section",
        xref="paper", yref="paper", x=0.5, y=-0.2,
        showarrow=False, font=dict(size=14, color="grey"), align="center"
    )

    fig.show()

    return stats_by_Court

# Example usage
distributed_by_area(perc, presentation_order, st_2425, area_mapper)


Unnamed: 0,area,Season_Tickets_Renew,Season_Tickets_New,Total_Price,area_velocity,presentation_order,last_year_tickets,remaining_capacity,fixed_remaining_capacity,ratio_capacity,ratio_capacity_percent,total_tickets,area_he,Season_Tickets_Renew_hover_text,Season_Tickets_New_hover_text,remaining_capacity_hover_text
0,Court_1C,50,2,59080,52,1.0,57,5,5,0.912281,91.0,57,1C,50 Tickets<br>(87.72%),2 Tickets<br>(3.51%),5 Tickets<br>(8.77%)
1,Court_2,242,22,412610,264,2.0,278,14,14,0.94964,95.0,278,אולם 2,242 Tickets<br>(87.05%),22 Tickets<br>(7.91%),14 Tickets<br>(5.04%)
2,Court_3,215,17,500871,232,3.0,267,35,35,0.868914,87.0,267,אולם 3,215 Tickets<br>(80.52%),17 Tickets<br>(6.37%),35 Tickets<br>(13.11%)
3,Court_4,128,30,233000,158,4.0,200,42,42,0.79,79.0,200,אולם 4,128 Tickets<br>(64.00%),30 Tickets<br>(15.00%),42 Tickets<br>(21.00%)
4,Court_5,151,7,178710,158,5.0,202,44,44,0.782178,78.0,202,אולם 5,151 Tickets<br>(74.75%),7 Tickets<br>(3.47%),44 Tickets<br>(21.78%)
5,Court_6,303,74,285595,377,6.0,404,27,27,0.933168,93.0,404,אולם 6,303 Tickets<br>(75.00%),74 Tickets<br>(18.32%),27 Tickets<br>(6.68%)
6,Court_7,25,9,40290,34,7.0,123,89,89,0.276423,28.0,123,אולם 7,25 Tickets<br>(20.33%),9 Tickets<br>(7.32%),89 Tickets<br>(72.36%)
7,Court_8,110,19,192570,129,8.0,169,40,40,0.763314,76.0,169,אולם 8,110 Tickets<br>(65.09%),19 Tickets<br>(11.24%),40 Tickets<br>(23.67%)
8,Court_9,51,3,222200,54,9.0,134,80,80,0.402985,40.0,134,אולם 9,51 Tickets<br>(38.06%),3 Tickets<br>(2.24%),80 Tickets<br>(59.70%)
9,Court_10,189,14,295750,203,10.0,212,9,9,0.957547,96.0,212,אולם 10,189 Tickets<br>(89.15%),14 Tickets<br>(6.60%),9 Tickets<br>(4.25%)


In [6]:
def plot_daily_sales_plotly(df, date_column, font_size=12, y_shift=8):
    df = df.copy()

    # Ensure datetime only if not already
    if not pd.api.types.is_datetime64_any_dtype(df[date_column]):
        df[date_column] = pd.to_datetime(df[date_column], errors='coerce')

    # Filter dates >= 2025-06-05
    df = df[df[date_column] >= pd.to_datetime("2025-06-05")]

    # Group by date only (not full datetime)
    df['date_only'] = df[date_column].dt.date

    grouped = (
        df.groupby('date_only')
        .size()
        .reset_index(name="Total_Sales")
        .sort_values('date_only')
    )

    # Convert date_only to string format for x-axis
    grouped['period_label'] = grouped['date_only'].astype(str)

    # Plotly figure
    fig = go.Figure()

    # Bar chart
    fig.add_trace(go.Bar(
        x=grouped['period_label'],
        y=grouped['Total_Sales'],
        name='Daily Sales',
        marker_color='firebrick',
        text=[f"{val:,}" for val in grouped['Total_Sales']],  # Pre-create formatted text
        textposition='outside',
        textfont=dict(size=font_size)
    ))

    # Layout
    fig.update_layout(
        title=dict(
            text='Daily Season Tickets Sold',
            font=dict(size=20),
            x=0.5
        ),
        xaxis=dict(
            title='Date',
            tickmode='array',
            tickvals=grouped['period_label'],
            ticktext=grouped['period_label'],
            tickangle=45,
            tickfont=dict(size=12),
            type='category'  # 👈 Force categorical spacing
        ),
        yaxis=dict(
            title='Sales',
            tickfont=dict(size=14),
            gridcolor='lightgrey',
            griddash='dot'
        ),
        bargap=0.1,
        height=750,
        width=1400,
        margin=dict(l=60, r=60, t=90, b=120),
        plot_bgcolor='white',
        paper_bgcolor='white',
        showlegend=False
    )

    fig.show()

# Call
plot_daily_sales_plotly(perc, date_column='sale_date', font_size=12, y_shift=10)

In [7]:
def plot_cumulative_sales_plotly(df, date_column, days=1, font_size=12, y_shift=8, start_date="2025-06-05"):
    df = df.copy()

    # Ensure datetime only if needed
    if not pd.api.types.is_datetime64_any_dtype(df[date_column]):
        df[date_column] = pd.to_datetime(df[date_column], errors='coerce')

    df = df[pd.notna(df[date_column])]

    # Filter to start from start_date
    df = df[df[date_column] >= pd.to_datetime(start_date)]

    # Work with date only
    df['date_only'] = df[date_column].dt.date

    if days == 1:
        grouped = (
            df.groupby('date_only')
            .size()
            .reset_index(name="Total_Sales")
            .sort_values('date_only')
        )
        grouped['period_label'] = grouped['date_only'].astype(str)  # Already date, just string it
    else:
        # N-day aggregation (e.g., 7 days = weekly)
        df['n_day_period'] = (pd.to_datetime(df['date_only']) - pd.to_timedelta(pd.to_datetime(df['date_only']).dt.dayofyear % days, unit='D')).dt.date

        grouped = (
            df.groupby('n_day_period')
            .size()
            .reset_index(name="Total_Sales")
            .sort_values('n_day_period')
        )
        grouped['period_label'] = grouped['n_day_period'].astype(str)

    # Cumulative sum
    grouped['Cumulative_Sales'] = grouped['Total_Sales'].cumsum()

    # Plotly figure
    fig = go.Figure()

    # Bars
    fig.add_trace(go.Bar(
        x=grouped['period_label'],
        y=grouped['Cumulative_Sales'],
        name='Cumulative Sales',
        marker_color='firebrick',
        text=[f"{val:,}" for val in grouped['Cumulative_Sales']],  # Faster than iterrows
        textposition='outside',
        textfont=dict(size=font_size)
    ))

    # Layout
    fig.update_layout(
        title=dict(
            text=f'Cumulative Season Tickets Sold ({days}-Day Aggregated)',
            font=dict(size=20),
            x=0.5
        ),
        xaxis=dict(
            title='Date',
            tickmode='array',
            tickvals=grouped['period_label'],
            ticktext=grouped['period_label'],
            tickangle=45,
            tickfont=dict(size=12),
            type='category'  # 👈 Force categorical spacing
        ),
        yaxis=dict(
            title='Cumulative Season Tickets Sold',
            tickfont=dict(size=14),
            gridcolor='lightgrey',
            griddash='dot'
        ),
        bargap=0.1,
        height=750,
        width=1400,
        margin=dict(l=60, r=60, t=90, b=120),
        plot_bgcolor='white',
        paper_bgcolor='white',
        showlegend=False
    )

    fig.show()

plot_cumulative_sales_plotly(perc, 'sale_date', days=7, font_size=12, y_shift=10)

In [8]:
def plot_daily_revenue_readable(df, font_size=12, y_shift=10):
    df = df.copy()

    # Ensure sale_date is datetime if needed
    if not pd.api.types.is_datetime64_any_dtype(df['sale_date']):
        df['sale_date'] = pd.to_datetime(df['sale_date'], errors='coerce')

    # Clean price column
    df = df[pd.to_numeric(df['price'], errors='coerce').notnull()]
    df['price'] = df['price'].astype(float)

    # ✅ Filter out zero-price rows
    df = df[df['price'] > 0]

    # Group by date only
    df['sale_date_only'] = df['sale_date'].dt.date

    daily = (
        df.groupby('sale_date_only')['price']
        .sum()
        .reset_index(name='Total_revenue')
        .sort_values('sale_date_only')
    )

    # Filter from June 5, 2025
    daily = daily[pd.to_datetime(daily['sale_date_only']) >= pd.to_datetime('2025-06-05')]

    # Divide by 1000
    daily['Total_revenue_thousands'] = daily['Total_revenue'] / 1000

    # Extra safeguard
    daily = daily[daily['Total_revenue_thousands'] >= 1].copy()

    # Format labels
    daily['period_label'] = pd.to_datetime(daily['sale_date_only']).dt.strftime('%d-%b-%Y')
    text_annotations = [f"{int(val):,}₪" for val in daily['Total_revenue_thousands']]

    # Plot
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=daily['period_label'],
        y=daily['Total_revenue_thousands'],
        name='Daily Revenue',
        marker_color='firebrick',
        text=text_annotations,
        textposition='outside',
        textfont=dict(size=font_size)
    ))

    # Layout
    fig.update_layout(
        title=dict(text='Daily Income Revenue', font=dict(size=20), x=0.5),
        xaxis=dict(
            title='Date',
            tickangle=45,
            tickfont=dict(size=12)
        ),

        yaxis=dict(
            title='Revenue (Thousands of ₪)',
            tickfont=dict(size=14),
            gridcolor='lightgrey',
            griddash='dot'
        ),
        bargap=0.1,
        height=750,
        width=1400,
        margin=dict(l=60, r=60, t=90, b=120),
        plot_bgcolor='white',
        paper_bgcolor='white',
        showlegend=False
    )

    fig.show()


# Example call
plot_daily_revenue_readable(perc, font_size=12, y_shift=10)

In [9]:
def plot_cumulative_revenue_plotly(df, date_column, revenue_column, days, font_size=12, y_shift=10, start_date="2025-06-05"):
    df = df.copy()

    # Ensure datetime if needed
    if not pd.api.types.is_datetime64_any_dtype(df[date_column]):
        df[date_column] = pd.to_datetime(df[date_column], errors='coerce')

    # Ensure numeric revenue
    df = df[pd.to_numeric(df[revenue_column], errors='coerce').notnull()]
    df[revenue_column] = df[revenue_column].astype(float)

    # Filter dates
    df = df[df[date_column] >= pd.to_datetime(start_date)]

    # Vectorized calculation of n-day periods
    df['n_day_period'] = (
        df[date_column]
        .dt.floor(f'{days}D')  # Round down to nearest n-day bucket
    )

    # Group and cumulative revenue
    grouped = (
        df.groupby('n_day_period')[revenue_column]
        .sum()
        .reset_index(name="Total_revenue")
        .sort_values('n_day_period')
    )

    grouped['Cumulative_Revenue'] = grouped['Total_revenue'].cumsum()
    grouped['Cumulative_Revenue_Million'] = (grouped['Cumulative_Revenue'] / 1_000_000).round(3)

    # Create period labels
    grouped['period_label'] = grouped['n_day_period'].dt.strftime('%Y-%m-%d')

    # Pre-create annotations
    text_annotations = [f"{val}" for val in grouped['Cumulative_Revenue_Million']]

    # Plot
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=grouped['period_label'],
        y=grouped['Cumulative_Revenue_Million'],
        name='Cumulative Revenue',
        marker_color='firebrick',
        text=text_annotations,
        textposition='outside',
        textfont=dict(size=font_size)
    ))

    # Layout
    fig.update_layout(
        title=dict(
            text=f'Cumulative Revenue ({days}-Day Aggregated)',
            font=dict(size=20),
            x=0.5
        ),
        xaxis=dict(
            title='Date',
            tickangle=45,
            tickfont=dict(size=12),
            tickmode='array',
            tickvals=grouped['period_label'],
            ticktext=grouped['period_label'],
            type='category'  # 👈 Force categorical spacing
        ),
        yaxis=dict(
            title='Cumulative Revenue (Millions of ₪)',
            tickfont=dict(size=14),
            gridcolor='lightgrey',
            griddash='dot'
        ),
        bargap=0.1,
        height=750,
        width=1400,
        margin=dict(l=60, r=60, t=90, b=120),
        plot_bgcolor='white',
        paper_bgcolor='white',
        showlegend=False
    )

    fig.show()

# Example call
plot_cumulative_revenue_plotly(perc, 'sale_date', 'price', days=7)

In [10]:
def plot_renewals_pie_chart(df, st_df, client_col='client_number', column='is_renew'):
    """
    Plots a pie chart of renewals vs new season tickets using Plotly.

    Parameters:
    - df: DataFrame with season ticket holders for this season.
    - st_df: DataFrame with last season's client numbers (for renewals).
    - client_col: Column name with client numbers.
    - column: Name of the boolean column for renewal status (calculated inside).
    """
    # Do not modify original df
    temp_df = df.copy()

    # Calculate renewals (True if client_number is in last season's data)
    temp_df[column] = temp_df[client_col].isin(st_df[client_col])

    # Count renewals and new
    counts = temp_df[column].value_counts()

    # Extract values safely
    renew_count = counts.get(True, 0)
    new_count = counts.get(False, 0)

    # Labels and values
    labels = [f'Renewal ({renew_count})', f'New ({new_count})']
    values = [renew_count, new_count]
    colors = ['red', 'skyblue']

    # Plotly pie chart
    fig = go.Figure(data=[
        go.Pie(
            labels=labels,
            values=values,
            marker=dict(colors=colors),
            textinfo='label+percent',
            insidetextorientation='radial',
            hole=0.3  # donut chart
        )
    ])

    fig.update_layout(
        title=dict(text='Season Tickets: Renewals vs New', font=dict(size=20), x=0.5),
        height=500,
        width=500,
        margin=dict(t=60, b=60, l=60, r=60),
        showlegend=False
    )

    fig.show()

# Example usage
plot_renewals_pie_chart(perc, st_2425)

In [11]:
import plotly.express as px

def plot_season_tickets_by_price_type(df):
    """
    Plots a horizontal bar chart of season tickets by price type using Plotly,
    ordered from most popular to least.
    """
    # Copy only if necessary
    df = df.copy()

    # Efficiently clean up price types
    df['price_type'] = (
        df['price_type']
        .astype(str)  # Ensure it's a string
        .str.replace(' (upgrade)', '', regex=False)
        .replace({'עסקי': 'Business'})
        .replace(to_replace=r'.*VIP.*', value='VIP', regex=True)
        .replace(to_replace=r'.*מחלקת נוער.*', value='Youth Department', regex=True)
    )

    # Count and aggregate
    grouped = (
        df['price_type']
        .value_counts()
        .rename_axis('price_type')
        .reset_index(name='ticket_count')
    )

    # Color map
    color_map = {
        'Complementary': 'gray',
        'Business': 'orange',
        'Box': 'brown',
        'VIP': 'purple',
        'Community': 'seagreen',
        'Youth Department': 'navy',
        'Child': 'black',
        'Adult': 'red',
        'Playoff finals - Adult': 'firebrick',
        'Playoff finals - Child': 'skyblue'
    }

    # Use Plotly's built-in qualitative color scale
    all_colors = px.colors.qualitative.Set3 + px.colors.qualitative.Pastel + px.colors.qualitative.Bold

    # Create dynamic color map for missing price types
    missing_types = [ptype for ptype in grouped['price_type'] if ptype not in color_map]
    dynamic_colors = dict(zip(missing_types, all_colors[:len(missing_types)]))

    # Combine static and dynamic color maps
    full_color_map = {**color_map, **dynamic_colors}

    # Assign colors
    grouped['color'] = grouped['price_type'].map(full_color_map)


    # Plot
    fig = go.Figure(go.Bar(
        x=grouped['ticket_count'],
        y=grouped['price_type'],
        orientation='h',
        marker_color=grouped['color'],
        text=grouped['ticket_count'],
        textposition='outside',
        textfont=dict(size=14)
    ))

    fig.update_layout(
        title=dict(text='Season Tickets By Price Type', font=dict(size=24), x=0.5),
        yaxis=dict(
            title='',
            tickfont=dict(size=14),
            categoryorder='total ascending'  # Automatically sort y-axis
        ),
        height=600,
        width=950,
        margin=dict(l=120, r=60, t=80, b=60),
        plot_bgcolor='white',
        paper_bgcolor='white',
        showlegend=False
    )

    fig.show()

# Example run
plot_season_tickets_by_price_type(perc)

In [12]:
def plot_purchase_method_pie(df, column='co_purchaser_email'):
    """
    Plots a pie chart of purchase methods (Independently vs Telemarketing).
    """
    # Do not modify original df
    temp_df = df.copy()

    # Vectorized assignment of purchase method
    temp_df['purchase_method'] = np.where(temp_df[column].isna(), 'Independently', 'Telemarketing')

    # Count
    counts = temp_df['purchase_method'].value_counts()

    # Prepare data
    purchase_methods = ['Independently', 'Telemarketing']
    values = [counts.get(method, 0) for method in purchase_methods]
    labels = [f"{method} ({count})" for method, count in zip(purchase_methods, values)]
    colors = ['red', 'skyblue']

    # Plot
    fig = go.Figure(data=[
        go.Pie(
            labels=labels,
            values=values,
            marker=dict(colors=colors),
            hole=0.4,
            textinfo='label+percent',
            textposition='inside',
            insidetextorientation='radial',
            textfont=dict(size=16, color='white')
        )
    ])

    fig.update_layout(
        title=dict(text='How Is A Season Ticket Purchased?', x=0.5, font=dict(size=20)),
        height=400,
        width=500,
        margin=dict(t=60, b=40, l=40, r=40),
        showlegend=False
    )

    fig.show()

# Example call
plot_purchase_method_pie(perc)

In [13]:
def plot_two_column_price_transition_sankey(st_2425, perc, min_value=5):
    perc = perc.copy()
    perc['is_renew'] = perc['client_number'].isin(st_2425['client_number'])

    # Keep column names clean and intuitive
    df = pd.merge(
        st_2425[['client_number', 'price_category']],
        perc[perc['is_renew']][['client_number', 'price_category']],
        on='client_number',
        how='inner'
    ).dropna()

    df.columns = ['client_number', 'price_2425', 'price_2526']
    df = df.drop(columns=['client_number'])

    # Count transitions
    pair_counts = df.groupby(['price_2425', 'price_2526']).size().reset_index(name='value')
    pair_counts = pair_counts[pair_counts['value'] >= min_value]

    if pair_counts.empty:
        print("No transitions meet the minimum value.")
        return

    # Detect all categories dynamically and align left/right
    used_labels = sorted(set(pair_counts['price_2425']).union(set(pair_counts['price_2526'])))
    all_labels = used_labels + used_labels
    label_to_index = {label: i for i, label in enumerate(all_labels)}

    sources = pair_counts['price_2425'].map(lambda x: used_labels.index(x)).astype(int)
    targets = pair_counts['price_2526'].map(lambda x: len(used_labels) + used_labels.index(x)).astype(int)
    values = pair_counts['value'].values

    hover_labels = (
        pair_counts['value'].astype(str) + " clients moved from " +
        pair_counts['price_2425'] + " (24/25) to " +
        pair_counts['price_2526'] + " (25/26)"
    )

    # Node positions (left = x=0.01, right = x=0.99, aligned vertically)
    node_x, node_y = [], []
    y_step = 1 / (len(used_labels) + 1)

    for i in range(len(used_labels)):
        node_x.append(0.01)  # Left column (24/25)
        node_y.append((i + 1) * y_step)

    for i in range(len(used_labels)):
        node_x.append(0.99)  # Right column (25/26)
        node_y.append((i + 1) * y_step)

    # Optional colors
    color_map = {
        'A': 'rgba(255, 105, 180, 0.6)',
        'B': 'rgba(70, 130, 180, 0.6)',
        'C': 'rgba(220, 20, 60, 0.6)',
        'D': 'rgba(144, 238, 144, 0.6)',
        'E': 'rgba(255, 165, 0, 0.6)',
        'F': 'rgba(255, 140, 0, 0.6)',
        'Silver': 'rgba(135, 206, 250, 0.6)',
        'Gold': 'rgba(34, 139, 34, 0.6)',
        'Court_side': 'rgba(255, 0, 0, 0.6)',
    }

    link_colors = [
        color_map.get(src, 'rgba(200, 100, 100, 0.6)')
        for src in pair_counts['price_2425']
    ]

    # Sankey diagram
    fig = go.Figure(data=[go.Sankey(
        arrangement="snap",
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=all_labels,
            color="lightgray",
            x=node_x,
            y=node_y
        ),
        link=dict(
            source=sources,
            target=targets,
            value=values,
            color=link_colors,
            customdata=hover_labels,
            hovertemplate='%{customdata}<extra></extra>',
        )
    )])

    fig.update_layout(
        title_text="Season Ticket Price Category Transitions",
        title_x=0.5,
        font_size=14,
        width=1100,
        height=600
    )

    fig.show()

    # Show the summary table
    display(pair_counts.rename(columns={
        'price_2425': 'From (24/25)',
        'price_2526': 'To (25/26)',
        'value': 'Number of Clients'
    }))

# Usage
plot_two_column_price_transition_sankey(st_2425, perc)

Unnamed: 0,From (24/25),To (25/26),Number of Clients
0,A,A,378
3,A,Gold,11
4,A,Silver,5
5,B,A,5
6,B,B,522
7,B,C,6
9,B,F,8
10,B,Silver,6
12,C,B,23
13,C,C,405


In [14]:
# Prepare cumulative revenue with active selling days
def prepare_cumulative_revenue(data, opening_date, date_column, amount_column, exclude_dates=None):
    """
    Prepares cumulative revenue data counting only active selling days.
    """
    df = data.copy()
    df[date_column] = pd.to_datetime(df[date_column])

    # Remove dates before opening
    df = df[df[date_column] >= opening_date]

    # Exclude dates if provided
    if exclude_dates is not None:
        df = df[~df[date_column].dt.date.isin(exclude_dates)]

    # Group by date and sum revenue
    daily_rev = (
        df.groupby(df[date_column].dt.date)[amount_column]
        .sum()
        .reset_index()
        .rename(columns={amount_column: 'daily_revenue'})
        .sort_values(date_column)
        .reset_index(drop=True)
    )

    # Assign sequential active selling days
    daily_rev['Days Since Opening'] = range(1, len(daily_rev) + 1)

    # Cumulative sum
    daily_rev['Cumulative Revenue'] = daily_rev['daily_revenue'].cumsum()

    return daily_rev

In [28]:
def plot_cumulative_revenue_comparison(st_2425, perc):
    OPENING_DATE_2425 = pd.to_datetime("2024-07-03")
    OPENING_DATE_2526 = pd.to_datetime("2025-06-05")

    # Create war dates (24/25 excluded dates)
    war_dates = [d.date() for d in pd.date_range("2025-07-13", "2025-07-24")]

    # Count tickets sold
    st_count_2425 = st_2425.shape[0]
    # st_count_2526 = perc.shape[0]

    # Filter 25/26
    df_2526_filtered = perc.copy()
    df_2526_filtered["sale_date"] = pd.to_datetime(df_2526_filtered["sale_date"])
    df_2526_filtered = df_2526_filtered[df_2526_filtered["sale_date"] >= OPENING_DATE_2526]

    # Count tickets
    st_count_2526 = df_2526_filtered.shape[0]


    # Prepare data with war dates excluded for 24/25
    daily_revenue_2425 = prepare_cumulative_revenue(
        st_2425,
        OPENING_DATE_2425,
        date_column="sale_date",
        amount_column="price",
        exclude_dates=war_dates
    )

    daily_revenue_2526 = prepare_cumulative_revenue(
        perc,
        OPENING_DATE_2526,
        date_column="sale_date",
        amount_column="price"
    )

    fig = go.Figure()

    # Trace for 24/25
    fig.add_trace(go.Scatter(
        x=daily_revenue_2425['Days Since Opening'],
        y=daily_revenue_2425['Cumulative Revenue'],
        mode='lines+markers',
        name=f'Season 24/25 ({st_count_2425:,} ST)',
        line=dict(color='black', width=2),
        marker=dict(size=5)
    ))

    # Trace for 25/26
    fig.add_trace(go.Scatter(
        x=daily_revenue_2526['Days Since Opening'],
        y=daily_revenue_2526['Cumulative Revenue'],
        mode='lines+markers',
        name=f'Season 25/26 ({st_count_2526:,} ST)',
        line=dict(color='red', width=2),
        marker=dict(size=5)
    ))

    # Early Bird End line (11/07/2025)
    early_bird_end_date = pd.to_datetime("2025-07-11")
    early_bird_row = daily_revenue_2526.loc[
        daily_revenue_2526['sale_date'] == early_bird_end_date
    ]

    if not early_bird_row.empty:
        early_bird_end_day_number = early_bird_row['Days Since Opening'].values[0]
    else:
        # Fallback if the date is not in the data
        # Force the Early Bird line to be at day 27
        early_bird_end_day_number = 27

    fig.update_layout(
        shapes=[
            dict(
                type="line",
                xref="x",
                yref="paper",
                x0=early_bird_end_day_number,
                y0=0,
                x1=early_bird_end_day_number,
                y1=1,
                line=dict(color="blue", width=2, dash="dash"),
                layer="below"
            )
        ],
        annotations=[
            dict(
                x=early_bird_end_day_number,
                y=1,
                xref="x",
                yref="paper",
                text="Early Bird End",
                showarrow=False,
                font=dict(color="blue"),
                xanchor="left",
                yanchor="bottom"
            )
        ]
    )

    # Annotations for the last day of 25/26
    last_day = daily_revenue_2526['Days Since Opening'].max()
    last_rev_2526 = daily_revenue_2526.loc[
        daily_revenue_2526['Days Since Opening'] == last_day, 'Cumulative Revenue'
    ].values[0]
    last_rev_2425 = daily_revenue_2425.loc[
        daily_revenue_2425['Days Since Opening'] == last_day, 'Cumulative Revenue'
    ]
    last_rev_2425 = last_rev_2425.values[0] if not last_rev_2425.empty else None

    fig.add_annotation(
        x=last_day,
        y=last_rev_2526,
        text=f'Day {last_day}<br>{last_rev_2526:,.0f}₪',
        showarrow=True,
        arrowhead=2,
        ax=30,
        ay=-40,
        font=dict(color='red'),
        arrowcolor='red'
    )

    if last_rev_2425 is not None:
        fig.add_annotation(
            x=last_day,
            y=last_rev_2425,
            text=f'Day {last_day}<br>{last_rev_2425:,.0f}₪',
            showarrow=True,
            arrowhead=2,
            ax=25,
            ay=-90,
            font=dict(color='black'),
            arrowcolor='black'
        )

    # Layout
    fig.update_layout(
        title=dict(
            text='Cumulative Revenue Over Time',
            x=0.5,
            font=dict(size=20)
        ),
        xaxis=dict(
            title='Days Since the Season Ticket Renewals Period Opening',
            tickfont=dict(size=12),
            tick0=1,
            dtick=7,
            tickangle=45,
            range=[
                1,
                max(
                    daily_revenue_2425['Days Since Opening'].max(),
                    daily_revenue_2526['Days Since Opening'].max()
                ) + 1
            ]
        ),
        yaxis=dict(
            title='Cumulative Revenue',
            tickfont=dict(size=12),
            zeroline=False,
            gridcolor='lightgray',
        ),
        width=1000,
        height=600,
        plot_bgcolor='white',
        paper_bgcolor='white',
        legend=dict(font=dict(size=12)),
    )

    fig.show()

plot_cumulative_revenue_comparison(st_2425, perc)

In [16]:
def prepare_cumulative_count(
    df,
    open_date=None,
    days_limit=160,
    exclude_dates=None
):
    df = df.copy()

    # Determine which date column to use
    date_column = 'Date.1' if 'Date.1' in df.columns else 'sale_date'

    if 'Date.1' in df.columns:
        sample = str(df['Date.1'].dropna().iloc[0])
        if '/' in sample:
            # st_2324 style: DD/MM/YYYY
            df['Date'] = pd.to_datetime(df['Date.1'], format='%d/%m/%Y', errors='coerce')
        else:
            df['Date'] = pd.to_datetime(df['Date.1'], errors='coerce')
    else:
        df['Date'] = pd.to_datetime(df[date_column], errors='coerce')

    # Drop rows with failed date parsing
    df = df.dropna(subset=['Date'])

    # Filter for valid 2023/2024 ranges
    df = df[
        ((df['Date'].dt.year == 2023) & (df['Date'].dt.month >= 6) & (df['Date'] >= pd.Timestamp('2023-06-25')))
        | (df['Date'].dt.year >= 2024)
    ]

    # Set the open date
    if open_date is None:
        open_date = df['Date'].min()

    # Remove dates before opening
    df = df[df['Date'] >= open_date]

    # Exclude specific dates if provided
    if exclude_dates is not None:
        df = df[~df['Date'].dt.date.isin(exclude_dates)]

    if df.empty:
        raise ValueError("No data remaining after applying date filters.")

    # Group by sale date and count
    daily_counts = (
        df.groupby(df['Date'].dt.date)
        .size()
        .reset_index()
        .rename(columns={0: 'daily_count', 'Date': 'Date'})
        .sort_values('Date')
        .reset_index(drop=True)
    )

    # Assign sequential "Days Since Opening"
    daily_counts['Days Since Opening'] = range(1, len(daily_counts) + 1)

    # Cumulative sum
    daily_counts['Cumulative Count'] = daily_counts['daily_count'].cumsum()

    return daily_counts


In [33]:
def plot_cumulative_count_comparison(st_2324, st_2425, perc):
    OPENING_DATE_2324 = pd.to_datetime("2023-06-26")
    OPENING_DATE_2425 = pd.to_datetime("2024-07-03")
    OPENING_DATE_2526 = pd.to_datetime("2025-06-05")

    st_count_2324 = st_2324.shape[0]
    st_count_2425 = st_2425.shape[0]
    st_count_2526 = prepare_cumulative_count(perc, OPENING_DATE_2526)['Cumulative Count'].iloc[-1]

    # Prepare data
    daily_count_2324 = prepare_cumulative_count(st_2324, OPENING_DATE_2324)
    daily_count_2425 = prepare_cumulative_count(st_2425, OPENING_DATE_2425)
    daily_count_2526 = prepare_cumulative_count(perc, OPENING_DATE_2526)

    fig = go.Figure()

    # 23/24
    fig.add_trace(go.Scatter(
        x=daily_count_2324['Days Since Opening'],
        y=daily_count_2324['Cumulative Count'],
        mode='lines+markers',
        name=f'Season 23/24 ({st_count_2324:,} ST)',
        line=dict(color='gray', width=2),
        marker=dict(size=5)
    ))

    # 24/25
    fig.add_trace(go.Scatter(
        x=daily_count_2425['Days Since Opening'],
        y=daily_count_2425['Cumulative Count'],
        mode='lines+markers',
        name=f'Season 24/25 ({st_count_2425:,} ST)',
        line=dict(color='black', width=2),
        marker=dict(size=5)
    ))

    # 25/26
    fig.add_trace(go.Scatter(
        x=daily_count_2526['Days Since Opening'],
        y=daily_count_2526['Cumulative Count'],
        mode='lines+markers',
        name=f'Season 25/26 ({st_count_2526:,} ST)',
        line=dict(color='red', width=2),
        marker=dict(size=5)
    ))

    # Early Bird END line (11/7/2025)
    early_bird_end_date = pd.to_datetime("2025-07-11")
    early_bird_end_row = daily_count_2526.loc[
        daily_count_2526['Date'] == early_bird_end_date.date(), 'Days Since Opening'
    ]
    if not early_bird_end_row.empty:
        early_bird_end_day_number = early_bird_end_row.values[0]
    else:
        # Force the Early Bird line to be at day 27
        early_bird_end_day_number = 27  


    fig.add_vline(
        x=early_bird_end_day_number,
        line=dict(color="blue", width=2, dash="dash"),
        annotation_text="Early Bird End",
        annotation_position="top right",
        annotation_font_color="blue",
    )

    # Annotations for last day
    day_to_annotate = daily_count_2526['Days Since Opening'].max()
    for df, color, label in [
        (daily_count_2324, 'gray', '23/24'),
        (daily_count_2425, 'black', '24/25'),
        (daily_count_2526, 'red', '25/26')
    ]:
        day_count = df.loc[df['Days Since Opening'] == day_to_annotate, 'Cumulative Count']
        if not day_count.empty:
            if color == 'gray':
                x_pos = day_to_annotate
                y_pos = day_count.values[0]
                ax_shift, ay_shift = -50, 10
            elif color == 'black':
                x_pos = day_to_annotate
                y_pos = day_count.values[0]
                ax_shift, ay_shift = 100, -20
            else:  # red
                x_pos = day_to_annotate + 0.6
                y_pos = day_count.values[0] + 100
                ax_shift, ay_shift = 25, -40

            fig.add_annotation(
                x=x_pos,
                y=y_pos,
                text=f'Day {day_to_annotate}<br>{day_count.values[0]:,} ST',
                showarrow=True,
                arrowhead=2,
                ax=ax_shift,
                ay=ay_shift,
                font=dict(color=color),
                arrowcolor='black' if color == 'black' else color
            )

    fig.update_layout(
        title=dict(
            text='Cumulative Season Ticket Count Over Time',
            x=0.5,
            font=dict(size=20)
        ),
        xaxis=dict(
            title='Days Since the Season Ticket Renewals Period Opening',
            tickmode='linear',
            tick0=1,
            dtick=7,
            tickangle=0,
            tickfont=dict(size=12),
            range=[1, max(
                daily_count_2324['Days Since Opening'].max(),
                daily_count_2425['Days Since Opening'].max(),
                daily_count_2526['Days Since Opening'].max()
            ) + 1]
        ),
        yaxis=dict(
            title='Cumulative Count',
            tickfont=dict(size=12),
            zeroline=False,
            gridcolor='lightgray',
        ),
        width=1000,
        height=600,
        plot_bgcolor='white',
        paper_bgcolor='white',
        legend=dict(font=dict(size=12)),
    )

    fig.show()

# Usage example
plot_cumulative_count_comparison(st_2324, st_2425, perc)

# OLD CODE WITHOUT 23-24    

In [18]:
# def prepare_cumulative_count(df, open_date, days_limit=160):
#     df = df.copy()
#     df['Date'] = pd.to_datetime(df['sale_date'])
#     df = df[df['Date'] >= open_date]

#     count = (
#         df.groupby('Date')
#         .size()
#         .reindex(pd.date_range(df['Date'].min(), df['Date'].max()), fill_value=0)
#         .rename_axis('Date')
#         .reset_index(name='daily_count')
#     )

#     count['Days Since Opening'] = (count['Date'] - open_date).dt.days + 1
#     count = count[count['Days Since Opening'] < days_limit]
#     count['Cumulative Count'] = count['daily_count'].cumsum()

#     return count

In [19]:
# def plot_cumulative_count_comparison(st_2425, perc):
#     OPENING_DATE_2425 = pd.to_datetime("2024-07-03")
#     OPENING_DATE_2526 = pd.to_datetime("2025-06-05")

#     st_count_2425 = st_2425.shape[0]
#     st_count_2526 = perc.shape[0]

#     # Prepare data
#     daily_count_2425 = prepare_cumulative_count(st_2425, OPENING_DATE_2425)
#     daily_count_2526 = prepare_cumulative_count(perc, OPENING_DATE_2526)

#     fig = go.Figure()

#     # Trace for 24/25
#     fig.add_trace(go.Scatter(
#         x=daily_count_2425['Days Since Opening'],
#         y=daily_count_2425['Cumulative Count'],
#         mode='lines+markers',
#         name=f'Season 24/25 ({st_count_2425:,} ST)',
#         line=dict(color='black', width=2),
#         marker=dict(size=5)
#     ))

#     # Trace for 25/26
#     fig.add_trace(go.Scatter(
#         x=daily_count_2526['Days Since Opening'],
#         y=daily_count_2526['Cumulative Count'],
#         mode='lines+markers',
#         name=f'Season 25/26 ({st_count_2526:,} ST)',
#         line=dict(color='red', width=2),
#         marker=dict(size=5)
#     ))

#     # Add annotations
#     last_day = daily_count_2526['Days Since Opening'].max()
#     last_count_2526 = daily_count_2526.loc[daily_count_2526['Days Since Opening'] == last_day, 'Cumulative Count'].values[0]
#     last_count_2425 = daily_count_2425.loc[daily_count_2425['Days Since Opening'] == last_day, 'Cumulative Count']
#     last_count_2425 = last_count_2425.values[0] if not last_count_2425.empty else None

#     fig.add_annotation(
#         x=last_day,
#         y=last_count_2526,
#         text=f'Day {last_day}<br>{last_count_2526:,} ST',
#         showarrow=True,
#         arrowhead=2,
#         ax=20,
#         ay=-40,
#         font=dict(color='red'),
#         arrowcolor='red'
#     )

#     if last_count_2425 is not None:
#         fig.add_annotation(
#             x=last_day,
#             y=last_count_2425,
#             text=f'Day {last_day}<br>{last_count_2425:,} ST',
#             showarrow=True,
#             arrowhead=2,
#             ax=60,
#             ay=-90,
#             font=dict(color='black'),
#             arrowcolor='black'
#         )

#     fig.update_layout(
#         title=dict(
#             text='Cumulative Season Ticket Count Over Time',
#             x=0.5,
#             font=dict(size=20)
#         ),
#         xaxis=dict(
#             title='Days Since the Season Ticket Renewals Period Opening',
#             tickfont=dict(size=12),
#             tick0=1,
#             dtick=7,
#             tickangle=45,
#             range=[1, max(daily_count_2425['Days Since Opening'].max(), daily_count_2526['Days Since Opening'].max()) + 1]
#         ),
#         yaxis=dict(
#             title='Cumulative Count',
#             tickfont=dict(size=12),
#             zeroline=False,
#             gridcolor='lightgray',
#         ),
#         width=1000,
#         height=600,
#         plot_bgcolor='white',
#         paper_bgcolor='white',
#         legend=dict(font=dict(size=12)),
#     )

#     fig.show()

# # Usage
# plot_cumulative_count_comparison(st_2425, perc)

## Impact of Games Attended on ST Renewal

In [20]:
df = pd.read_excel('summary_df.xlsx')

In [21]:
# Convert User Id and client_number to int
df['User Id'] = df['User Id'].astype(int)
perc['client_number'] = perc['client_number'].astype(int)

# Make sure Games Attended is numeric
df['Games Attended'] = pd.to_numeric(df['Games Attended'], errors='coerce')

# Prepare renewed flag
renewed_ids = set(perc['client_number'])
df['Renewed'] = df['User Id'].isin(renewed_ids)

# Max games attended (to define the range)
max_games_attended = int(df['Games Attended'].max())

# Create categories: 0, 1, 2, ..., max_games_attended
attendance_bins = list(range(0, max_games_attended + 1))

# Count total ST members per number of games attended
total_counts = df['Games Attended'].value_counts().reindex(attendance_bins, fill_value=0)

# Count renewed ST members per number of games attended
renewed_counts = df[df['Renewed']]['Games Attended'].value_counts().reindex(attendance_bins, fill_value=0)

# Calculate renewal rates
renewal_rates = (renewed_counts / total_counts * 100).round(1)  # percentage with 1 decimal

# Prepare texts (only counts, no percentages)
total_texts = total_counts.astype(str)
renewed_texts = renewed_counts.astype(str)

# Plot with Plotly
fig = go.Figure()

# Light Red bars - total ST members
fig.add_trace(go.Bar(
    x=attendance_bins,
    y=total_counts,
    name='ST Members',
    marker_color='#ff9999',
    text=total_counts.astype(str),
    textposition='outside',
    yaxis='y1'
))

# Dark Red bars - renewed ST members
fig.add_trace(go.Bar(
    x=attendance_bins,
    y=renewed_counts,
    name='Renewed Members',
    marker_color='#cc0000',
    text=renewed_counts.astype(str),
    textposition='outside',
    yaxis='y1'
))

# Renewal Rate Line
fig.add_trace(go.Scatter(
    x=attendance_bins,
    y=renewal_rates,
    name='Renewal Rate (%)',
    mode='lines+markers+text',
    text=[f"{r}%" for r in renewal_rates],
    textposition='top center',
    line=dict(color='black', width=2),
    marker=dict(size=8),
    yaxis='y2'
))

# Update layout with bigger height
fig.update_layout(
    title='Impact of Games Attended on ST Renewal',
    xaxis_title='Games Attended',
    yaxis=dict(
        title='Number of ST Members',
        side='left'
    ),
    yaxis2=dict(
        title='Renewal Rate (%)',
        overlaying='y',
        side='right',
        range=[0, 100]
    ),
    xaxis=dict(
        tickmode='linear',
        tick0=0,
        dtick=1
    ),
    barmode='group',
    bargap=0.15,
    bargroupgap=0.1,
    template='simple_white',
    legend=dict(x=0.7, y=1.15, orientation='h', bgcolor='rgba(255,255,255,0)'),
    height=500  # <-- This makes it higher! Default is ~450-500
)

fig.show()