In [21]:
# Install and import packages
!pip install pandas openpyxl matplotlib plotly xlsxwriter -q

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from google.colab import files
from IPython.display import display, HTML
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 100)
print("Setup complete!")

Setup complete!


In [22]:
# Upload your categorized Excel file
print("Please upload: categorized_orders_clean.xlsx")
uploaded = files.upload()
filename = list(uploaded.keys())[0]
print(f"\nUploaded: {filename}")

Please upload: categorized_orders_clean.xlsx


Saving categorized_orders_clean.xlsx to categorized_orders_clean (1).xlsx

Uploaded: categorized_orders_clean (1).xlsx


In [23]:
# Load and prepare data
df = pd.read_excel(filename)
print(f"Loaded {len(df)} rows")

# Get order column (first column)
order_col = df.columns[0]

# Filter to parent rows only (rows with order number)
parent_df = df[df[order_col].notna()].copy()
print(f"{len(parent_df)} unique orders")

# Parse dates
if 'Order Date' in parent_df.columns:
    parent_df['Order_Date_Parsed'] = pd.to_datetime(parent_df['Order Date'], errors='coerce')
if 'Void Date' in parent_df.columns:
    parent_df['Void_Date_Parsed'] = pd.to_datetime(parent_df['Void Date'], errors='coerce')
if 'Order Time' in parent_df.columns:
    parent_df['Order_Time_Parsed'] = pd.to_datetime(parent_df['Order Time'], errors='coerce')

# Calculate time gap
if 'Order_Time_Parsed' in parent_df.columns and 'Void_Date_Parsed' in parent_df.columns:
    parent_df['Time_Gap_Hours'] = (parent_df['Void_Date_Parsed'] - parent_df['Order_Time_Parsed']).dt.total_seconds() / 3600

# Standard display columns
display_cols = [order_col, 'Outlet', 'Order Type', 'Order Date', 'Reason', 'Void By ', 'Amount']
avail_cols = [c for c in display_cols if c in parent_df.columns]

print(f"\nColumns: {parent_df.columns.tolist()}")

Loaded 2482 rows
713 unique orders

Columns: ['Order No', 'Outlet', 'Order Type', 'Order Date', 'Contact no', 'Amount', 'Reason', 'Remark', 'Void By ', 'Void Date', 'Placed By', 'Order Time', 'Item', 'Qty', 'Rate', 'Predicted_Category', 'Extracted_New_Bill', 'Order_Date_Parsed', 'Void_Date_Parsed', 'Order_Time_Parsed', 'Time_Gap_Hours']


---
## 1. High-Value Void Bills (Suspicious Large Amounts)
Orders with amounts above the 95th percentile - unusually high values that need scrutiny

In [24]:
# Flag 1: High value voids (above 95th percentile)
amount_threshold = parent_df['Amount'].quantile(0.95)
high_value_voids = parent_df[parent_df['Amount'] >= amount_threshold].sort_values('Amount', ascending=False).copy()

display(HTML(f'''
<div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
    <h3>High-Value Void Bills</h3>
    <p><b>Threshold:</b> Rs. {amount_threshold:,.2f} (95th percentile)</p>
    <p><b>Found:</b> {len(high_value_voids)} orders with unusually high amounts</p>
    <p><b>Total Value:</b> Rs. {high_value_voids['Amount'].sum():,.2f}</p>
</div>
'''))

high_value_display = high_value_voids[avail_cols + ['Predicted_Category']].head(25).copy()
high_value_display['Amount'] = high_value_display['Amount'].apply(lambda x: f"Rs. {x:,.2f}")
display(high_value_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

Unnamed: 0,Order No,Outlet,Order Type,Order Date,Reason,Void By,Amount,Predicted_Category
2354,MM6241,Trincomalee,Delivery,2026-01-24 00:00:00,cx want to change the time,34652.0,"Rs. 20,521.75",cus. Change the order
148,N50066,Bambalapitiya,Dine In,2026-01-14 00:00:00,Customer wanted to change the order,6391.0,"Rs. 17,974.00",cus. Change the order
2409,U54642,Union Place,Take Away,2026-01-27 00:00:00,Customer wanted to change the order,31988.0,"Rs. 16,745.03",promotion
776,MO3899,Kadawatha,Delivery,2026-01-27 00:00:00,Cux don't Have Money for settle this order,23266.0,"Rs. 15,641.25",payment issue
1367,U52727,Kurunegala 2,Take Away,2026-01-10 00:00:00,Customer wanted to change the order,29737.0,"Rs. 15,450.03",cus. Change the order
2378,U50388,Union Place,Dine In,2026-01-04 00:00:00,Customer wanted to change the order,29082.0,"Rs. 15,447.75",cus. Change the order
256,A47373,Colombo City Center PH,Aggregator,2026-01-01 00:00:00,Due to Out Of Stock,28748.0,"Rs. 15,174.83",out of stock
1382,U53207,Kurunegala 2,Take Away,2026-01-15 00:00:00,Customer wanted to change the order,29737.0,"Rs. 14,525.02",promotion
425,U68934,Dehiwela,Dine In,2026-01-23 00:00:00,Customer want to have a Promotion,11536.0,"Rs. 14,469.50",promotion
1924,N72513,Nuwara Eliya,Dine In,2026-01-23 00:00:00,Customer denied the order,9506.0,"Rs. 13,899.75",Customer denied the order


---
## 2. Frequent Voiders (Staff with High Void Count)
Staff members who void significantly more orders than average - potential abuse pattern

In [25]:
# Flag 2: Staff who void frequently
voider_stats = pd.DataFrame()
frequent_voiders = pd.DataFrame()

if 'Void By ' in parent_df.columns:
    voider_stats = parent_df.groupby('Void By ').agg({
        order_col: 'count',
        'Amount': ['sum', 'mean', 'max'],
        'Outlet': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Multiple'
    }).reset_index()
    voider_stats.columns = ['Void By', 'Void Count', 'Total Value', 'Avg Value', 'Max Value', 'Primary Outlet']
    voider_stats = voider_stats.sort_values('Void Count', ascending=False)

    # Flag top voiders (above 1.5x average)
    avg_voids = voider_stats['Void Count'].mean()
    frequent_voiders = voider_stats[voider_stats['Void Count'] > avg_voids * 1.5].copy()

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Frequent Voiders</h3>
        <p><b>Threshold:</b> Above {avg_voids * 1.5:.0f} voids (1.5x average of {avg_voids:.1f})</p>
        <p><b>Found:</b> {len(frequent_voiders)} staff members with high void frequency</p>
    </div>
    '''))

    freq_display = frequent_voiders.copy()
    freq_display['Total Value'] = freq_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
    freq_display['Avg Value'] = freq_display['Avg Value'].apply(lambda x: f"Rs. {x:,.2f}")
    freq_display['Max Value'] = freq_display['Max Value'].apply(lambda x: f"Rs. {x:,.2f}")
    display(freq_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))
else:
    print("Void By column not available")

Unnamed: 0,Void By,Void Count,Total Value,Avg Value,Max Value,Primary Outlet
100,24970.0,107,"Rs. 411,226.88","Rs. 3,843.24","Rs. 13,139.73",Havelock
81,23266.0,59,"Rs. 256,239.52","Rs. 4,343.04","Rs. 15,641.25",Dehiwela
39,6873.0,38,"Rs. 132,434.84","Rs. 3,485.13","Rs. 8,836.50",Rajagiriya
188,34652.0,29,"Rs. 131,723.27","Rs. 4,542.18","Rs. 20,521.75",Athurugiriya
172,32543.0,23,"Rs. 73,869.93","Rs. 3,211.74","Rs. 7,718.50",Kotahena
61,10971.0,22,"Rs. 61,872.93","Rs. 2,812.41","Rs. 7,557.25",Ja-Ela
168,31913.0,22,"Rs. 83,660.26","Rs. 3,802.74","Rs. 8,127.00",Borella
36,6110.0,12,"Rs. 48,014.78","Rs. 4,001.23","Rs. 6,976.75",Katugasthota
167,31654.0,8,"Rs. 13,484.52","Rs. 1,685.57","Rs. 2,497.50",Kochchikade
147,29774.0,8,"Rs. 34,236.04","Rs. 4,279.50","Rs. 6,400.01",Nuwara Eliya


In [26]:
# Visualize voider distribution
if len(voider_stats) > 0:
    fig = px.bar(voider_stats.head(20), x='Void By', y='Void Count',
                 title='Top 20 Staff by Void Count',
                 color='Total Value', color_continuous_scale='Reds')
    fig.update_layout(xaxis_tickangle=-45, height=500)
    fig.add_hline(y=avg_voids * 1.5, line_dash="dash", line_color="red",
                  annotation_text=f"Threshold: {avg_voids * 1.5:.0f}")
    fig.show()

---
## 3. Voids Without Reason/Remark (No Justification)
Orders voided without any explanation - highly suspicious, no accountability

In [27]:
# Flag 3: Voids with no reason
no_reason_voids = parent_df[parent_df['Predicted_Category'] == 'order without reason/ remark'].sort_values('Amount', ascending=False).copy()

display(HTML(f'''
<div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
    <h3>Voids Without Reason/Remark</h3>
    <p><b>Found:</b> {len(no_reason_voids)} orders voided without proper justification</p>
    <p><b>Total Value:</b> Rs. {no_reason_voids['Amount'].sum():,.2f}</p>
    <p>⚠️ <b>Risk:</b> No accountability for these voids</p>
</div>
'''))

no_reason_display = no_reason_voids[avail_cols].head(30).copy()
no_reason_display['Amount'] = no_reason_display['Amount'].apply(lambda x: f"Rs. {x:,.2f}")
display(no_reason_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

Unnamed: 0,Order No,Outlet,Order Type,Order Date,Reason,Void By,Amount
1994,MH4319,Pelawatta,Delivery,2026-01-15 00:00:00,order was changed to 89,34652.0,"Rs. 9,314.88"
825,MG6591,Kandana,Delivery,2026-01-14 00:00:00,INFROMED BY OUTLET,6873.0,"Rs. 7,998.00"
1536,LY8478,Matara,Delivery,2026-01-01 00:00:00,cxdinedorder,10971.0,"Rs. 7,557.25"
1546,MI0894,Matara,Delivery,2026-01-16 00:00:00,Order,6873.0,"Rs. 6,439.25"
1004,MK2210,Kesbewa,Delivery,2026-01-20 00:00:00,new dct 6,24970.0,"Rs. 5,934.00"
1424,LZ4446,Makola,Delivery,2026-01-02 00:00:00,order resend by 78 order,34652.0,"Rs. 5,471.75"
1554,ML2148,Matara,Take Away,2026-01-21 00:00:00,cxwantsproo,24970.0,"Rs. 5,450.00"
993,MQ7879,Kesbewa,Delivery,2026-01-30 00:00:00,Customer place a takeaway order,23266.0,"Rs. 4,837.50"
1989,MB4408,Pelawatta,Delivery,2026-01-06 00:00:00,025,34652.0,"Rs. 4,837.50"
433,MP0660,Dehiwela,Delivery,2026-01-27 00:00:00,cell centar re docket 188,23266.0,"Rs. 4,816.00"


---
## 4. Late Night Voids (10 PM - 5 AM)
Voids processed during unusual hours when supervision is typically low

In [28]:
# Flag 4: Late night voids
late_night_voids = pd.DataFrame()

if 'Void_Date_Parsed' in parent_df.columns:
    parent_df['Void_Hour'] = parent_df['Void_Date_Parsed'].dt.hour
    late_night_voids = parent_df[(parent_df['Void_Hour'] >= 22) | (parent_df['Void_Hour'] <= 5)].sort_values('Amount', ascending=False).copy()

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Late Night Voids (10 PM - 5 AM)</h3>
        <p><b>Found:</b> {len(late_night_voids)} orders voided during unusual hours</p>
        <p><b>Total Value:</b> Rs. {late_night_voids['Amount'].sum():,.2f}</p>
        <p>⚠️ <b>Risk:</b> Low supervision period, higher fraud risk</p>
    </div>
    '''))

    if len(late_night_voids) > 0:
        late_cols = avail_cols + ['Void_Hour']
        late_display = late_night_voids[[c for c in late_cols if c in late_night_voids.columns]].head(25).copy()
        display(late_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))
    else:
        print("No late night voids found")
else:
    print("Void Date not available for time analysis")

Unnamed: 0,Order No,Outlet,Order Type,Order Date,Reason,Void By,Amount,Void_Hour
2409,U54642,Union Place,Take Away,2026-01-27 00:00:00,Customer wanted to change the order,31988.0,16745.0278,23
776,MO3899,Kadawatha,Delivery,2026-01-27 00:00:00,Cux don't Have Money for settle this order,23266.0,15641.2501,22
425,U68934,Dehiwela,Dine In,2026-01-23 00:00:00,Customer want to have a Promotion,11536.0,14469.5001,22
308,D73221,Dambulla,Dine In,2026-01-09 00:00:00,Customer wanted to change the order,22203.0,12136.7499,22
1854,N77080,Nittambuwa,Dine In,2026-01-21 00:00:00,Customer want to have a Promotion,2169.0,11226.12,22
768,K45189,Kadawatha,Dine In,2026-01-10 00:00:00,Customer want to have a Promotion,30234.0,10922.0,22
1130,P52822,Kolonnawa,Take Away,2026-01-28 00:00:00,Customer denied the order,7477.0,10405.0173,22
1150,MA2208,Kotahena,Delivery,2026-01-03 00:00:00,cancel custmer,37188.0,9191.2501,22
648,ML2167,Havelock,Delivery,2026-01-21 00:00:00,customer want to cancel the oder.,24970.0,9094.5001,22
881,MR0169,Katugasthota,Delivery,2026-01-30 00:00:00,"customer placed 02 offers for the order, outlet showed only one offer, therefore order no.150 placed",23266.0,8772.0001,22


---
## 5. Round Number Amounts (Possible Fake Orders)
Orders with suspiciously round amounts (divisible by 500/1000) that may indicate fabrication

In [None]:
# Flag 5: Round number amounts
def is_suspiciously_round(amount):
    if pd.isna(amount) or amount < 1000:
        return False
    return amount % 1000 == 0 or amount % 500 == 0

parent_df['Is_Round'] = parent_df['Amount'].apply(is_suspiciously_round)
round_voids = parent_df[parent_df['Is_Round'] == True].sort_values('Amount', ascending=False).copy()

display(HTML(f'''
<div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
    <h3>Suspiciously Round Amounts</h3>
    <p><b>Criteria:</b> Amount >= Rs. 1,000 AND divisible by 500 or 1000</p>
    <p><b>Found:</b> {len(round_voids)} orders with perfectly round amounts</p>
    <p><b>Total Value:</b> Rs. {round_voids['Amount'].sum():,.2f}</p>
    <p>⚠️ <b>Risk:</b> Round numbers are rare in legitimate orders</p>
</div>
'''))

round_display = round_voids[avail_cols + ['Predicted_Category']].head(25).copy()
round_display['Amount'] = round_display['Amount'].apply(lambda x: f"Rs. {x:,.2f}")
display(round_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

Unnamed: 0,oa,Outlet,Order Type,Order Date,Reason,Void By,Amount,Predicted_Category
1510,JT1644,Kurunegala,Delivery,2025-10-12 00:00:00,CSR ERROR,34652.0,"Rs. 16,500.00",Call Center mistake
2714,HK8920,Ragama,Delivery,2025-07-15 00:00:00,035,9856.0,"Rs. 2,500.00",order without reason/ remark
2738,HB8587,Rajagiriya,Delivery,2025-07-02 00:00:00,customer not answer the call and still customer not arrived,24970.0,"Rs. 2,500.00",phone


---
## 6. Same Phone Number with Multiple Voids
Customers with repeated voided orders - possible collusion or fake orders

In [None]:
# Flag 6: Repeat phone numbers
repeat_phone_df = pd.DataFrame()
phone_summary = pd.DataFrame()

if 'Contact no' in parent_df.columns:
    # Clean phone numbers
    parent_df['Contact_Clean'] = parent_df['Contact no'].astype(str).str.strip()

    phone_counts = parent_df['Contact_Clean'].value_counts()
    repeat_phones = phone_counts[phone_counts > 1].index.tolist()
    repeat_phone_df = parent_df[parent_df['Contact_Clean'].isin(repeat_phones)].sort_values(['Contact_Clean', 'Amount'], ascending=[True, False]).copy()

    # Summary by phone
    phone_summary = parent_df[parent_df['Contact_Clean'].isin(repeat_phones)].groupby('Contact_Clean').agg({
        order_col: 'count',
        'Amount': 'sum',
        'Outlet': lambda x: ', '.join(x.unique()[:3])
    }).reset_index()
    phone_summary.columns = ['Contact No', 'Void Count', 'Total Value', 'Outlets']
    phone_summary = phone_summary.sort_values('Void Count', ascending=False)

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Repeat Phone Numbers with Multiple Voids</h3>
        <p><b>Found:</b> {len(repeat_phones)} phone numbers with multiple voided orders</p>
        <p><b>Total Orders:</b> {len(repeat_phone_df)}</p>
        <p><b>Total Value:</b> Rs. {repeat_phone_df['Amount'].sum():,.2f}</p>
        <p><b>Risk:</b> Pattern of repeated voids from same customer</p>
    </div>
    '''))

    # Show summary first
    display(HTML("<h4>Phone Numbers with Most Voids:</h4>"))
    phone_display = phone_summary.head(20).copy()
    phone_display['Total Value'] = phone_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
    display(phone_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))
else:
    print("Contact number column not available")

Unnamed: 0,Contact No,Void Count,Total Value,Outlets
25,,57,"Rs. 259,313.81","Aluthgama, Balangoda, Bambalapitiya"
0,+94 117 75,28,"Rs. 68,156.81","Boralasgamuwa, Kandy, Katugasthota"
21,777777777,17,"Rs. 40,695.52","Aluthgama, Bambalapitiya, Colombo City Center PH"
4,717492104,16,"Rs. 51,256.00","Ambalangoda, Anuradhapura, Avissawella"
20,777750456,5,"Rs. 31,128.45","Nugegoda, Panadura 2, Piliyandala"
17,773929092,5,"Rs. 7,374.50","Moratuwa, Wattala, Union Place"
9,764033123,3,"Rs. 23,252.25","Dehiwela, Nawala"
1,702836538,2,"Rs. 5,866.27",Wattala
3,716363617,2,"Rs. 4,278.76",Pilimathalawa
2,711469418,2,"Rs. 3,610.81",Negambo


---
## 7. Outlet Anomalies (Statistical Outliers)
Outlets with void patterns significantly above normal - using z-score analysis

In [None]:
# Flag 7: Outlet anomalies using z-scores
outlet_stats = parent_df.groupby('Outlet').agg({
    order_col: 'count',
    'Amount': ['sum', 'mean', 'max']
}).reset_index()
outlet_stats.columns = ['Outlet', 'Void Count', 'Total Value', 'Avg Value', 'Max Value']

# Calculate z-scores
outlet_stats['Count_ZScore'] = (outlet_stats['Void Count'] - outlet_stats['Void Count'].mean()) / outlet_stats['Void Count'].std()
outlet_stats['Value_ZScore'] = (outlet_stats['Total Value'] - outlet_stats['Total Value'].mean()) / outlet_stats['Total Value'].std()

# Flag outlets with z-score > 1.5
anomaly_outlets = outlet_stats[(outlet_stats['Count_ZScore'] > 1.5) | (outlet_stats['Value_ZScore'] > 1.5)].copy()
anomaly_outlets = anomaly_outlets.sort_values('Total Value', ascending=False)

display(HTML(f'''
<div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
    <h3>Outlets with Anomalous Void Patterns</h3>
    <p><b>Method:</b> Z-score analysis (>1.5 standard deviations)</p>
    <p><b>Found:</b> {len(anomaly_outlets)} outlets with unusual void patterns</p>
    <p>⚠️ <b>Risk:</b> These outlets have statistically abnormal void rates</p>
</div>
'''))

anomaly_display = anomaly_outlets.copy()
anomaly_display['Total Value'] = anomaly_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
anomaly_display['Avg Value'] = anomaly_display['Avg Value'].apply(lambda x: f"Rs. {x:,.2f}")
anomaly_display['Max Value'] = anomaly_display['Max Value'].apply(lambda x: f"Rs. {x:,.2f}")
anomaly_display['Count_ZScore'] = anomaly_display['Count_ZScore'].apply(lambda x: f"{x:.2f}")
anomaly_display['Value_ZScore'] = anomaly_display['Value_ZScore'].apply(lambda x: f"{x:.2f}")
display(anomaly_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

Unnamed: 0,Outlet,Void Count,Total Value,Avg Value,Max Value,Count_ZScore,Value_ZScore
107,Wattala,32,"Rs. 223,539.62","Rs. 6,985.61","Rs. 126,752.21",3.45,5.9
97,Rajagiriya,29,"Rs. 144,407.19","Rs. 4,979.56","Rs. 46,200.08",3.03,3.49
4,Anuradhapura,17,"Rs. 140,052.93","Rs. 8,238.41","Rs. 40,656.50",1.35,3.36
83,Nuwara Eliya,17,"Rs. 111,972.35","Rs. 6,586.61","Rs. 32,740.05",1.35,2.51
105,Union Place,21,"Rs. 92,009.61","Rs. 4,381.41","Rs. 14,510.02",1.91,1.9
80,Negambo,42,"Rs. 90,137.94","Rs. 2,146.14","Rs. 6,020.00",4.86,1.84
84,One Galle Face,13,"Rs. 84,259.47","Rs. 6,481.50","Rs. 48,600.08",0.78,1.67
49,Kochchikade,32,"Rs. 82,109.97","Rs. 2,565.94","Rs. 10,508.77",3.45,1.6
104,Trincomalee,21,"Rs. 81,697.30","Rs. 3,890.35","Rs. 8,772.00",1.91,1.59


In [None]:
# Visualize outlet distribution
fig = px.scatter(outlet_stats, x='Void Count', y='Total Value', text='Outlet',
                 title='Outlet Void Pattern Analysis (Red = Anomaly)',
                 color=outlet_stats['Outlet'].isin(anomaly_outlets['Outlet']),
                 color_discrete_map={True: 'red', False: 'blue'})
fig.update_traces(textposition='top center', marker=dict(size=12))
fig.update_layout(height=600, showlegend=False)
fig.show()

---
## 8. Testing Category Orders (Potential Misuse)
Orders marked as 'testing' - verify these are legitimate test orders

In [None]:
# Flag 8: Testing category
testing_voids = parent_df[parent_df['Predicted_Category'] == 'testing'].sort_values('Amount', ascending=False).copy()

display(HTML(f'''
<div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
    <h3>'Testing' Category Orders</h3>
    <p><b>Found:</b> {len(testing_voids)} orders marked as testing</p>
    <p><b>Total Value:</b> Rs. {testing_voids['Amount'].sum():,.2f}</p>
    <p>⚠️ <b>Action:</b> Verify these are legitimate test orders, not abuse of the testing excuse</p>
</div>
'''))

if len(testing_voids) > 0:
    testing_display = testing_voids[avail_cols].head(25).copy()
    testing_display['Amount'] = testing_display['Amount'].apply(lambda x: f"Rs. {x:,.2f}")
    display(testing_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))
else:
    print("No testing orders found")

Unnamed: 0,oa,Outlet,Order Type,Order Date,Reason,Void By,Amount
2218,W72045,Wattala,Take Away,2025-10-11 00:00:00,Product Testing,24632.0,"Rs. 126,752.21"
2564,P25693,Pilimathalawa,Take Away,2025-07-14 00:00:00,Product Testing,6559.0,"Rs. 8,400.01"
1346,O34235,Kondavil,Aggregator,2025-10-05 00:00:00,Product Testing,21697.0,"Rs. 2,290.00"
2987,HC2357,Wattala,Delivery,2025-07-03 00:00:00,TEST ORDER FROM PRESHAN IT,386.0,"Rs. 2,236.00"
2951,HP3627,Union Place,Delivery,2025-07-22 00:00:00,Test order from Preshan IT,35972.0,"Rs. 1,763.00"
2986,HC2358,Wattala,Delivery,2025-07-03 00:00:00,TEST ORDER FROM PRESHAN IT,386.0,"Rs. 1,612.50"
711,JT4845,Bambalapitiya,Delivery,2025-10-13 00:00:00,TES ODER,34652.0,Rs. 967.50
1759,JU0012,Moratuwa,Delivery,2025-10-14 00:00:00,TEST ORDER FROM IT - PRESHAN,6873.0,Rs. 881.50
2227,JY0870,Wattala,Delivery,2025-10-20 00:00:00,TEST ORDER FROM IT - PRESHAN,386.0,Rs. 881.50
2692,T27567,Puttalam,Take Away,2025-07-28 00:00:00,Product Testing,33028.0,Rs. 820.00


---
## 9. Extremely Delayed Voids (>24 Hours)
Orders voided more than 24 hours after placement - highly suspicious

In [None]:
# Flag 9: Extremely delayed voids
extreme_delay_voids = pd.DataFrame()

if 'Time_Gap_Hours' in parent_df.columns:
    extreme_delay_voids = parent_df[parent_df['Time_Gap_Hours'] > 24].sort_values('Time_Gap_Hours', ascending=False).copy()

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Extremely Delayed Voids (>24 Hours)</h3>
        <p><b>Found:</b> {len(extreme_delay_voids)} orders voided more than 24 hours after placement</p>
        <p><b>Total Value:</b> Rs. {extreme_delay_voids['Amount'].sum():,.2f}</p>
        <p>⚠️ <b>Risk:</b> Long delays suggest possible manipulation or cover-up</p>
    </div>
    '''))

    if len(extreme_delay_voids) > 0:
        ext_cols = [order_col, 'Outlet', 'Void By ', 'Time_Gap_Hours', 'Amount', 'Reason']
        ext_display = extreme_delay_voids[[c for c in ext_cols if c in extreme_delay_voids.columns]].head(25).copy()
        ext_display['Time_Gap_Hours'] = ext_display['Time_Gap_Hours'].apply(lambda x: f"{x:.1f} hours")
        ext_display['Amount'] = ext_display['Amount'].apply(lambda x: f"Rs. {x:,.2f}")
        display(ext_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))
    else:
        print("No extremely delayed voids found")
else:
    print("Time gap data not available")

No extremely delayed voids found


---
## 10. Same Voider + High Value Combination
Staff members who void high-value orders - intersection of two risk factors

In [None]:
# Flag 10: Voiders with high-value voids
if 'Void By ' in parent_df.columns and len(high_value_voids) > 0:
    high_value_by_voider = high_value_voids.groupby('Void By ').agg({
        order_col: 'count',
        'Amount': ['sum', 'mean', 'max']
    }).reset_index()
    high_value_by_voider.columns = ['Void By', 'High Value Count', 'Total High Value', 'Avg High Value', 'Max Single Void']
    high_value_by_voider = high_value_by_voider.sort_values('Total High Value', ascending=False)

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Staff with Multiple High-Value Voids</h3>
        <p><b>Analysis:</b> Who voids the most high-value orders?</p>
        <p>⚠️ <b>Risk:</b> Combination of access and high amounts</p>
    </div>
    '''))

    hv_display = high_value_by_voider.head(15).copy()
    hv_display['Total High Value'] = hv_display['Total High Value'].apply(lambda x: f"Rs. {x:,.2f}")
    hv_display['Avg High Value'] = hv_display['Avg High Value'].apply(lambda x: f"Rs. {x:,.2f}")
    hv_display['Max Single Void'] = hv_display['Max Single Void'].apply(lambda x: f"Rs. {x:,.2f}")
    display(hv_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

Unnamed: 0,Void By,High Value Count,Total High Value,Avg High Value,Max Single Void
16,24632.0,1,"Rs. 126,752.21","Rs. 126,752.21","Rs. 126,752.21"
13,23266.0,6,"Rs. 98,186.41","Rs. 16,364.40","Rs. 40,656.50"
8,9506.0,2,"Rs. 55,616.05","Rs. 27,808.03","Rs. 32,740.05"
22,29082.0,1,"Rs. 48,600.08","Rs. 48,600.08","Rs. 48,600.08"
24,30137.0,1,"Rs. 46,200.08","Rs. 46,200.08","Rs. 46,200.08"
5,6873.0,4,"Rs. 41,527.25","Rs. 10,381.81","Rs. 11,556.25"
29,35972.0,1,"Rs. 40,656.50","Rs. 40,656.50","Rs. 40,656.50"
6,7423.0,2,"Rs. 26,541.75","Rs. 13,270.88","Rs. 15,480.00"
10,11266.0,2,"Rs. 24,690.04","Rs. 12,345.02","Rs. 14,450.02"
4,5844.0,1,"Rs. 22,525.04","Rs. 22,525.04","Rs. 22,525.04"


---
# Combined fraud risk score
Orders with multiple red flags are highest priority for investigation

In [None]:
# Create combined fraud risk score
parent_df['Fraud_Flags'] = 0
parent_df['Fraud_Reasons'] = ''

# Add flags with weights
# High value (1 point)
parent_df.loc[parent_df['Amount'] >= amount_threshold, 'Fraud_Flags'] += 1
parent_df.loc[parent_df['Amount'] >= amount_threshold, 'Fraud_Reasons'] += 'High Value; '

# No reason (2 points - more serious)
parent_df.loc[parent_df['Predicted_Category'] == 'order without reason/ remark', 'Fraud_Flags'] += 2
parent_df.loc[parent_df['Predicted_Category'] == 'order without reason/ remark', 'Fraud_Reasons'] += 'No Reason; '

# Testing (1 point)
parent_df.loc[parent_df['Predicted_Category'] == 'testing', 'Fraud_Flags'] += 1
parent_df.loc[parent_df['Predicted_Category'] == 'testing', 'Fraud_Reasons'] += 'Testing; '

# Round amount (1 point)
parent_df.loc[parent_df['Is_Round'] == True, 'Fraud_Flags'] += 1
parent_df.loc[parent_df['Is_Round'] == True, 'Fraud_Reasons'] += 'Round Amount; '

# Late night (1 point)
if 'Void_Hour' in parent_df.columns:
    parent_df.loc[(parent_df['Void_Hour'] >= 22) | (parent_df['Void_Hour'] <= 5), 'Fraud_Flags'] += 1
    parent_df.loc[(parent_df['Void_Hour'] >= 22) | (parent_df['Void_Hour'] <= 5), 'Fraud_Reasons'] += 'Late Night; '

# Extreme delay (2 points - more serious)
if 'Time_Gap_Hours' in parent_df.columns:
    parent_df.loc[parent_df['Time_Gap_Hours'] > 24, 'Fraud_Flags'] += 2
    parent_df.loc[parent_df['Time_Gap_Hours'] > 24, 'Fraud_Reasons'] += 'Extreme Delay; '

# Classify risk levels
parent_df['Risk_Level'] = pd.cut(parent_df['Fraud_Flags'], bins=[-1, 0, 1, 2, 10],
                                  labels=['Low', 'Medium', 'High', 'Critical'])

# High risk orders (2+ flags)
high_risk_orders = parent_df[parent_df['Fraud_Flags'] >= 2].sort_values(['Fraud_Flags', 'Amount'], ascending=[False, False]).copy()
critical_orders = parent_df[parent_df['Fraud_Flags'] >= 3].copy()

# Summary
risk_summary = parent_df['Risk_Level'].value_counts().reindex(['Critical', 'High', 'Medium', 'Low'])

print("")




In [None]:
# Risk Summary Display
display(HTML(f'''
<div style="padding:30px; color:white; border-radius:15px; text-align:center;">
    <h1>Fraud Risk Summary</h1>
    <table style="width:80%; margin:auto; color:white; font-size:18px; border-collapse:collapse;">
        <tr style="border-bottom:1px solid white;"><td style="padding:10px;">High-Value Voids:</td><td><b>{len(high_value_voids)}</b></td></tr>
        <tr style="border-bottom:1px solid white;"><td style="padding:10px;">Voids Without Reason:</td><td><b>{len(no_reason_voids)}</b></td></tr>
        <tr style="border-bottom:1px solid white;"><td style="padding:10px;">Late Night Voids:</td><td><b>{len(late_night_voids)}</b></td></tr>
        <tr style="border-bottom:1px solid white;"><td style="padding:10px;">Round Amount Voids:</td><td><b>{len(round_voids)}</b></td></tr>
        <tr style="border-bottom:1px solid white;"><td style="padding:10px;">Testing Category:</td><td><b>{len(testing_voids)}</b></td></tr>
        <tr style="border-bottom:1px solid white;"><td style="padding:10px;">Extreme Delays (>24hr):</td><td><b>{len(extreme_delay_voids)}</b></td></tr>
        <tr style="border-bottom:1px solid white;"><td style="padding:10px;">Frequent Voiders:</td><td><b>{len(frequent_voiders)}</b></td></tr>
        <tr style="border-bottom:1px solid white;"><td style="padding:10px;">Outlet Anomalies:</td><td><b>{len(anomaly_outlets)}</b></td></tr>
        <tr style="background:;"><td style="padding:15px;"><b>⚠️ CRITICAL RISK ORDERS (3+ flags):</b></td><td><b>{len(critical_orders)}</b></td></tr>
        <tr style="background:;"><td style="padding:15px;"><b>HIGH RISK ORDERS (2+ flags):</b></td><td><b>{len(high_risk_orders)}</b></td></tr>
    </table>
</div>
'''))

0,1
High-Value Voids:,42
Voids Without Reason:,10
Late Night Voids:,235
Round Amount Voids:,3
Testing Category:,12
Extreme Delays (>24hr):,0
Frequent Voiders:,25
Outlet Anomalies:,9
⚠️ CRITICAL RISK ORDERS (3+ flags):,1
HIGH RISK ORDERS (2+ flags):,23


In [None]:
# Show high risk orders
display(HTML("<h2>⚠️ High risk orders (Priority Investigation List)</h2>"))

if len(high_risk_orders) > 0:
    risk_cols = [order_col, 'Outlet', 'Void By ', 'Amount', 'Predicted_Category', 'Fraud_Flags', 'Fraud_Reasons', 'Risk_Level']
    risk_display = high_risk_orders[[c for c in risk_cols if c in high_risk_orders.columns]].head(50).copy()
    risk_display['Amount'] = risk_display['Amount'].apply(lambda x: f"Rs. {x:,.2f}")

    def highlight_risk(row):
        if row['Risk_Level'] == 'Critical':
            return ['background-color: #D32F2F; color: white'] * len(row)
        elif row['Risk_Level'] == 'High':
            return ['background-color: #FF5722; color: white'] * len(row)
        return [''] * len(row)

    display(risk_display.style.apply(highlight_risk, axis=1).set_table_styles([{'selector': 'th', 'props': [('background-color', '#B71C1C'), ('color', 'white')]}]))
else:
    print("No high-risk orders found")

Unnamed: 0,oa,Outlet,Void By,Amount,Predicted_Category,Fraud_Flags,Fraud_Reasons,Risk_Level
2714,HK8920,Ragama,9856.0,"Rs. 2,500.00",order without reason/ remark,3,No Reason; Round Amount;,Critical
2218,W72045,Wattala,24632.0,"Rs. 126,752.21",testing,2,High Value; Testing;,High
1510,JT1644,Kurunegala,34652.0,"Rs. 16,500.00",Call Center mistake,2,High Value; Round Amount;,High
1649,S35048,Matugama,21319.0,"Rs. 14,157.75",cus. Change the order,2,High Value; Late Night;,High
510,HE8955,Bambalapitiya,23266.0,"Rs. 13,410.62",Call Center mistake,2,High Value; Late Night;,High
673,JL8394,Athurugiriya,6873.0,"Rs. 11,556.25",Order delay,2,High Value; Late Night;,High
3023,W62409,Wattala,24716.0,"Rs. 11,427.25",promotion,2,High Value; Late Night;,High
2504,HS5346,Pelawatta,6873.0,"Rs. 11,029.50",location,2,High Value; Late Night;,High
1753,W43234,Moratuwa,4145.0,"Rs. 10,790.02",Cashier mistake,2,High Value; Late Night;,High
1061,JR5179,Kalutara,23266.0,"Rs. 10,717.75",Call Center mistake,2,High Value; Late Night;,High


In [None]:
# Risk distribution chart
fig = px.pie(values=risk_summary.values, names=risk_summary.index,
             title='Risk Level Distribution',
             color=risk_summary.index,
             color_discrete_map={'Critical': '#D32F2F', 'High': '#FF5722', 'Medium': '#FFC107', 'Low': '#4CAF50'})
fig.update_traces(textposition='inside', textinfo='percent+label+value')
fig.update_layout(height=500)
fig.show()

---
## Export the report

In [None]:
# Export comprehensive fraud detection report
output_file = 'Fraud_Detection_Report.xlsx'

with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
    workbook = writer.book

    # Sheet 1: All Orders with Risk Scoring
    parent_df.to_excel(writer, sheet_name='1_All_Orders_Risk', index=False)

    # Sheet 2: HIGH RISK ORDERS (Priority)
    high_risk_orders.to_excel(writer, sheet_name='2_HIGH_RISK', index=False)

    # Sheet 3: Critical Orders (3+ flags)
    critical_orders.to_excel(writer, sheet_name='3_CRITICAL', index=False)

    # Sheet 4: High Value Voids
    high_value_voids.to_excel(writer, sheet_name='4_High_Value', index=False)

    # Sheet 5: No Reason Voids
    no_reason_voids.to_excel(writer, sheet_name='5_No_Reason', index=False)

    # Sheet 6: Late Night Voids
    if len(late_night_voids) > 0:
        late_night_voids.to_excel(writer, sheet_name='6_Late_Night', index=False)

    # Sheet 7: Round Amount Voids
    round_voids.to_excel(writer, sheet_name='7_Round_Amounts', index=False)

    # Sheet 8: Repeat Phone Numbers
    if len(repeat_phone_df) > 0:
        repeat_phone_df.to_excel(writer, sheet_name='8_Repeat_Phones', index=False)

    # Sheet 9: Phone Summary
    if len(phone_summary) > 0:
        phone_summary.to_excel(writer, sheet_name='9_Phone_Summary', index=False)

    # Sheet 10: Voider Stats
    if len(voider_stats) > 0:
        voider_stats.to_excel(writer, sheet_name='10_Voider_Stats', index=False)

    # Sheet 11: Frequent Voiders
    if len(frequent_voiders) > 0:
        frequent_voiders.to_excel(writer, sheet_name='11_Frequent_Voiders', index=False)

    # Sheet 12: Outlet Stats
    outlet_stats.to_excel(writer, sheet_name='12_Outlet_Stats', index=False)

    # Sheet 13: Anomaly Outlets
    if len(anomaly_outlets) > 0:
        anomaly_outlets.to_excel(writer, sheet_name='13_Anomaly_Outlets', index=False)

    # Sheet 14: Testing Orders
    if len(testing_voids) > 0:
        testing_voids.to_excel(writer, sheet_name='14_Testing', index=False)

    # Sheet 15: Extreme Delays
    if len(extreme_delay_voids) > 0:
        extreme_delay_voids.to_excel(writer, sheet_name='15_Extreme_Delays', index=False)

    # Sheet 16: Summary Stats
    summary_df = pd.DataFrame({
        'Metric': [
            'Total Orders Analyzed',
            'High-Value Voids',
            'Voids Without Reason',
            'Late Night Voids',
            'Round Amount Voids',
            'Testing Category',
            'Extreme Delays (>24hr)',
            'Repeat Phone Numbers',
            'Frequent Voiders',
            'Anomaly Outlets',
            'CRITICAL RISK Orders (3+ flags)',
            'HIGH RISK Orders (2+ flags)'
        ],
        'Count': [
            len(parent_df),
            len(high_value_voids),
            len(no_reason_voids),
            len(late_night_voids),
            len(round_voids),
            len(testing_voids),
            len(extreme_delay_voids),
            len(phone_summary) if len(phone_summary) > 0 else 0,
            len(frequent_voiders),
            len(anomaly_outlets),
            len(critical_orders),
            len(high_risk_orders)
        ]
    })
    summary_df.to_excel(writer, sheet_name='16_Summary', index=False)

print(f"Fraud Detection Report exported to: {output_file}")
print(f"")
print("Sheets included:")
print("   1. All Orders with Risk Scoring")
print("   2. HIGH RISK Orders (2+ flags)")
print("   3. CRITICAL Orders (3+ flags)")
print("   4. High Value Voids")
print("   5. No Reason Voids")
print("   6. Late Night Voids")
print("   7. Round Amount Voids")
print("   8. Repeat Phone Numbers")
print("   9. Phone Summary")
print("   10. Voider Stats (all staff)")
print("   11. Frequent Voiders (flagged)")
print("   12. Outlet Stats (all outlets)")
print("   13. Anomaly Outlets (flagged)")
print("   14. Testing Orders")
print("   15. Extreme Delays")
print("   16. Summary Statistics")

# Download
files.download(output_file)

Fraud Detection Report exported to: Fraud_Detection_Report.xlsx

Sheets included:
   1. All Orders with Risk Scoring
   2. HIGH RISK Orders (2+ flags)
   3. CRITICAL Orders (3+ flags)
   4. High Value Voids
   5. No Reason Voids
   6. Late Night Voids
   7. Round Amount Voids
   8. Repeat Phone Numbers
   9. Phone Summary
   10. Voider Stats (all staff)
   11. Frequent Voiders (flagged)
   12. Outlet Stats (all outlets)
   13. Anomaly Outlets (flagged)
   14. Testing Orders
   15. Extreme Delays
   16. Summary Statistics


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

---
## Analysis Complete!

### Fraud Flags Used:
| Flag | Points | Description |
|------|--------|-------------|
| High Value | 1 | Amount above 95th percentile |
| No Reason | 2 | No justification provided |
| Testing | 1 | Marked as testing |
| Round Amount | 1 | Divisible by 500/1000 |
| Late Night | 1 | 10 PM - 5 AM |
| Extreme Delay | 2 | >24 hours after order |

### Risk Levels:
- **Critical (3+ points)**: Immediate investigation required
- **High (2 points)**: Priority review needed
- **Medium (1 point)**: Monitor and verify
- **Low (0 points)**: Standard void

### Excel Report Contains:
- All orders with risk scores
- Prioritized investigation lists
- Staff void patterns
- Outlet anomaly analysis
- Repeat customer analysis

---
## 11. Order Type Analysis (Takeaway / Dine-in / Delivery)
Breakdown of void patterns by order type - takeaway and dine-in voids are higher risk as food is consumed on-site or collected immediately

In [29]:
# Order type breakdown
order_type_stats = parent_df.groupby('Order Type').agg({
    order_col: 'count',
    'Amount': ['sum', 'mean', 'max']
}).reset_index()
order_type_stats.columns = ['Order Type', 'Void Count', 'Total Value', 'Avg Value', 'Max Value']
order_type_stats['Percentage'] = (order_type_stats['Void Count'] / order_type_stats['Void Count'].sum() * 100).round(1)
order_type_stats = order_type_stats.sort_values('Total Value', ascending=False)

display(HTML(f'''
<div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
    <h3>Void Distribution by Order Type</h3>
    <p>Takeaway and Dine-in voids are higher risk - food is already prepared/consumed</p>
</div>
'''))

ot_display = order_type_stats.copy()
ot_display['Total Value'] = ot_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
ot_display['Avg Value'] = ot_display['Avg Value'].apply(lambda x: f"Rs. {x:,.2f}")
ot_display['Max Value'] = ot_display['Max Value'].apply(lambda x: f"Rs. {x:,.2f}")
ot_display['Percentage'] = ot_display['Percentage'].apply(lambda x: f"{x}%")
display(ot_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

# Visualize
fig = px.pie(order_type_stats, values='Void Count', names='Order Type',
             title='Void Distribution by Order Type',
             color_discrete_sequence=px.colors.sequential.Reds_r)
fig.update_traces(textposition='inside', textinfo='percent+label+value')
fig.show()

Unnamed: 0,Order Type,Void Count,Total Value,Avg Value,Max Value,Percentage
1,Delivery,263,"Rs. 1,062,018.03","Rs. 4,038.09","Rs. 20,521.75",36.9%
3,Take Away,246,"Rs. 889,314.51","Rs. 3,615.10","Rs. 16,745.03",34.5%
2,Dine In,153,"Rs. 728,576.19","Rs. 4,761.94","Rs. 17,974.00",21.5%
0,Aggregator,51,"Rs. 133,503.17","Rs. 2,617.71","Rs. 15,174.83",7.2%


---
## 12. Takeaway Void Analysis (High Risk)
Takeaway orders are high-risk because food is prepared and handed over - voiding after collection means free food

In [30]:
# Takeaway specific analysis
takeaway_voids = parent_df[parent_df['Order Type'].str.lower().str.contains('take', na=False)].copy()

takeaway_by_outlet = takeaway_voids.groupby('Outlet').agg({
    order_col: 'count',
    'Amount': ['sum', 'mean']
}).reset_index()
takeaway_by_outlet.columns = ['Outlet', 'Takeaway Voids', 'Total Value', 'Avg Value']
takeaway_by_outlet = takeaway_by_outlet.sort_values('Total Value', ascending=False)

# Calculate takeaway void rate per outlet
outlet_total_voids = parent_df.groupby('Outlet')[order_col].count().reset_index()
outlet_total_voids.columns = ['Outlet', 'Total Voids']
takeaway_by_outlet = takeaway_by_outlet.merge(outlet_total_voids, on='Outlet')
takeaway_by_outlet['Takeaway %'] = (takeaway_by_outlet['Takeaway Voids'] / takeaway_by_outlet['Total Voids'] * 100).round(1)

# Flag outlets with high takeaway void percentage (above 50%)
high_takeaway_outlets = takeaway_by_outlet[takeaway_by_outlet['Takeaway %'] > 50].copy()

display(HTML(f'''
<div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
    <h3>Takeaway Void Analysis</h3>
    <p><b>Total Takeaway Voids:</b> {len(takeaway_voids)}</p>
    <p><b>Total Value:</b> Rs. {takeaway_voids['Amount'].sum():,.2f}</p>
    <p><b>Outlets with >50% Takeaway Voids:</b> {len(high_takeaway_outlets)}</p>
    <p>Risk: Takeaway voids after food handover = potential theft</p>
</div>
'''))

ta_display = takeaway_by_outlet.head(20).copy()
ta_display['Total Value'] = ta_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
ta_display['Avg Value'] = ta_display['Avg Value'].apply(lambda x: f"Rs. {x:,.2f}")
ta_display['Takeaway %'] = ta_display['Takeaway %'].apply(lambda x: f"{x}%")
display(ta_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

Unnamed: 0,Outlet,Takeaway Voids,Total Value,Avg Value,Total Voids,Takeaway %
0,Trincomalee,12,"Rs. 46,435.07","Rs. 3,869.59",22,54.5%
1,Kurunegala 2,3,"Rs. 36,180.06","Rs. 12,060.02",7,42.9%
2,Kundasale,5,"Rs. 33,000.04","Rs. 6,600.01",10,50.0%
3,Union Place,5,"Rs. 29,165.03","Rs. 5,833.01",8,62.5%
4,Nuwara Eliya,6,"Rs. 28,670.04","Rs. 4,778.34",12,50.0%
5,Aluthgama,6,"Rs. 26,060.04","Rs. 4,343.34",9,66.7%
6,Dambulla,7,"Rs. 24,076.53","Rs. 3,439.50",15,46.7%
7,Kegalle,4,"Rs. 22,425.04","Rs. 5,606.26",8,50.0%
8,Embuldeniya,3,"Rs. 19,260.03","Rs. 6,420.01",8,37.5%
9,Polonnaruwa,4,"Rs. 18,633.03","Rs. 4,658.26",5,80.0%


---
## 13. Dine-in Void Analysis (High Risk)
Dine-in voids are suspicious because the customer is present and food is served - why void after consumption?

In [31]:
# Dine-in specific analysis
dinein_voids = parent_df[parent_df['Order Type'].str.lower().str.contains('dine', na=False)].copy()

dinein_by_outlet = dinein_voids.groupby('Outlet').agg({
    order_col: 'count',
    'Amount': ['sum', 'mean']
}).reset_index()
dinein_by_outlet.columns = ['Outlet', 'Dine-in Voids', 'Total Value', 'Avg Value']
dinein_by_outlet = dinein_by_outlet.sort_values('Total Value', ascending=False)

# Calculate dine-in void rate per outlet
dinein_by_outlet = dinein_by_outlet.merge(outlet_total_voids, on='Outlet')
dinein_by_outlet['Dine-in %'] = (dinein_by_outlet['Dine-in Voids'] / dinein_by_outlet['Total Voids'] * 100).round(1)

# Flag outlets with high dine-in void percentage
high_dinein_outlets = dinein_by_outlet[dinein_by_outlet['Dine-in %'] > 40].copy()

display(HTML(f'''
<div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
    <h3>Dine-in Void Analysis</h3>
    <p><b>Total Dine-in Voids:</b> {len(dinein_voids)}</p>
    <p><b>Total Value:</b> Rs. {dinein_voids['Amount'].sum():,.2f}</p>
    <p><b>Outlets with >40% Dine-in Voids:</b> {len(high_dinein_outlets)}</p>
    <p>Risk: Dine-in voids after service = customer ate without paying</p>
</div>
'''))

di_display = dinein_by_outlet.head(20).copy()
di_display['Total Value'] = di_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
di_display['Avg Value'] = di_display['Avg Value'].apply(lambda x: f"Rs. {x:,.2f}")
di_display['Dine-in %'] = di_display['Dine-in %'].apply(lambda x: f"{x}%")
display(di_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

Unnamed: 0,Outlet,Dine-in Voids,Total Value,Avg Value,Total Voids,Dine-in %
0,Dambulla,7,"Rs. 38,087.25","Rs. 5,441.04",15,46.7%
1,Dehiwela,8,"Rs. 37,872.25","Rs. 4,734.03",25,32.0%
2,Colombo City Center PH,9,"Rs. 34,345.18","Rs. 3,816.13",16,56.2%
3,Trincomalee,6,"Rs. 32,233.34","Rs. 5,372.22",22,27.3%
4,Katugasthota,6,"Rs. 31,637.25","Rs. 5,272.88",20,30.0%
5,Nuwara Eliya,6,"Rs. 30,809.50","Rs. 5,134.92",12,50.0%
6,Bambalapitiya,2,"Rs. 22,252.50","Rs. 11,126.25",12,16.7%
7,Union Place,2,"Rs. 19,887.50","Rs. 9,943.75",8,25.0%
8,One Galle Face,3,"Rs. 16,888.25","Rs. 5,629.42",4,75.0%
9,Nittambuwa,2,"Rs. 15,741.12","Rs. 7,870.56",4,50.0%


---
## 14. Self-Voiding Detection (Placed By = Void By)
Staff voiding their own orders - potential collusion with customers or theft

In [32]:
# Self-voiding detection
self_void_df = pd.DataFrame()

if 'Placed By' in parent_df.columns and 'Void By ' in parent_df.columns:
    # Clean names for comparison
    parent_df['Placed_Clean'] = parent_df['Placed By'].astype(str).str.strip().str.lower()
    parent_df['Voider_Clean'] = parent_df['Void By '].astype(str).str.strip().str.lower()

    # Find self-voids
    self_void_df = parent_df[parent_df['Placed_Clean'] == parent_df['Voider_Clean']].copy()
    self_void_df = self_void_df[self_void_df['Placed_Clean'] != 'nan'].copy()

    # Stats by person
    self_void_stats = self_void_df.groupby('Void By ').agg({
        order_col: 'count',
        'Amount': ['sum', 'mean', 'max']
    }).reset_index()
    self_void_stats.columns = ['Staff', 'Self-Void Count', 'Total Value', 'Avg Value', 'Max Value']
    self_void_stats = self_void_stats.sort_values('Total Value', ascending=False)

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Self-Voiding Detection</h3>
        <p><b>Found:</b> {len(self_void_df)} orders where placer = voider</p>
        <p><b>Total Value:</b> Rs. {self_void_df['Amount'].sum():,.2f}</p>
        <p><b>Staff Involved:</b> {len(self_void_stats)}</p>
        <p>Risk: Staff voiding their own orders may indicate theft or collusion</p>
    </div>
    '''))

    sv_display = self_void_stats.head(20).copy()
    sv_display['Total Value'] = sv_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
    sv_display['Avg Value'] = sv_display['Avg Value'].apply(lambda x: f"Rs. {x:,.2f}")
    sv_display['Max Value'] = sv_display['Max Value'].apply(lambda x: f"Rs. {x:,.2f}")
    display(sv_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))
else:
    print("Placed By column not available for self-void analysis")

Unnamed: 0,Staff,Self-Void Count,Total Value,Avg Value,Max Value


---
## 15. Quick Void Detection (Voided within 30 minutes of order)
Orders voided very quickly after placement - food may not have been prepared, potential fake order

In [33]:
# Quick void detection (within 30 minutes)
quick_void_df = pd.DataFrame()

if 'Time_Gap_Hours' in parent_df.columns:
    parent_df['Time_Gap_Minutes'] = parent_df['Time_Gap_Hours'] * 60
    quick_void_df = parent_df[(parent_df['Time_Gap_Minutes'] >= 0) & (parent_df['Time_Gap_Minutes'] <= 30)].copy()
    quick_void_df = quick_void_df.sort_values('Amount', ascending=False)

    # Stats by voider
    quick_void_by_staff = quick_void_df.groupby('Void By ').agg({
        order_col: 'count',
        'Amount': 'sum'
    }).reset_index()
    quick_void_by_staff.columns = ['Staff', 'Quick Void Count', 'Total Value']
    quick_void_by_staff = quick_void_by_staff.sort_values('Quick Void Count', ascending=False)

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Quick Voids (Within 30 Minutes)</h3>
        <p><b>Found:</b> {len(quick_void_df)} orders voided within 30 minutes of placement</p>
        <p><b>Total Value:</b> Rs. {quick_void_df['Amount'].sum():,.2f}</p>
        <p>Risk: Very quick voids may indicate pre-planned fraud or fake orders</p>
    </div>
    '''))

    qv_cols = [order_col, 'Outlet', 'Order Type', 'Void By ', 'Time_Gap_Minutes', 'Amount', 'Reason']
    qv_display = quick_void_df[[c for c in qv_cols if c in quick_void_df.columns]].head(25).copy()
    qv_display['Time_Gap_Minutes'] = qv_display['Time_Gap_Minutes'].apply(lambda x: f"{x:.0f} min")
    qv_display['Amount'] = qv_display['Amount'].apply(lambda x: f"Rs. {x:,.2f}")
    display(qv_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))
else:
    print("Time gap data not available")

Unnamed: 0,Order No,Outlet,Order Type,Void By,Time_Gap_Minutes,Amount,Reason
2354,MM6241,Trincomalee,Delivery,34652.0,5 min,"Rs. 20,521.75",cx want to change the time
1367,U52727,Kurunegala 2,Take Away,29737.0,29 min,"Rs. 15,450.03",Customer wanted to change the order
256,A47373,Colombo City Center PH,Aggregator,28748.0,23 min,"Rs. 15,174.83",Due to Out Of Stock
2339,T65615,Trincomalee,Dine In,6925.0,30 min,"Rs. 13,362.25",Customer wanted to change the order
810,K37055,Kandana,Dine In,24832.0,27 min,"Rs. 12,993.53",Customer wanted to change the order
833,W89759,Kandy,Take Away,10044.0,12 min,"Rs. 12,620.02",Customer wanted to change the order
15,L41883,Aluthgama,Take Away,27519.0,12 min,"Rs. 10,630.02",Customer wanted to change the order
1130,P52822,Kolonnawa,Take Away,7477.0,10 min,"Rs. 10,405.02",Customer denied the order
1864,LZ6122,Nugegoda,Delivery,24970.0,18 min,"Rs. 9,954.50",time change
2026,MN9413,Peradeniya,Delivery,34652.0,7 min,"Rs. 9,914.19",Called in a few minutes and wanted to cancel


---
## 16. Same Day Void Pattern (Order and Void on Same Day)
Orders placed and voided on the same day - normal for legitimate issues, but high volume is suspicious

In [34]:
# Same day void analysis
same_day_voids = pd.DataFrame()

if 'Order_Date_Parsed' in parent_df.columns and 'Void_Date_Parsed' in parent_df.columns:
    parent_df['Order_Date_Only'] = parent_df['Order_Date_Parsed'].dt.date
    parent_df['Void_Date_Only'] = parent_df['Void_Date_Parsed'].dt.date

    same_day_voids = parent_df[parent_df['Order_Date_Only'] == parent_df['Void_Date_Only']].copy()
    diff_day_voids = parent_df[parent_df['Order_Date_Only'] != parent_df['Void_Date_Only']].copy()

    # Staff with most same-day voids
    same_day_by_staff = same_day_voids.groupby('Void By ').agg({
        order_col: 'count',
        'Amount': 'sum'
    }).reset_index()
    same_day_by_staff.columns = ['Staff', 'Same Day Voids', 'Total Value']
    same_day_by_staff = same_day_by_staff.sort_values('Same Day Voids', ascending=False)

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Same Day Void Analysis</h3>
        <p><b>Same Day Voids:</b> {len(same_day_voids)} ({len(same_day_voids)/len(parent_df)*100:.1f}%)</p>
        <p><b>Different Day Voids:</b> {len(diff_day_voids)} ({len(diff_day_voids)/len(parent_df)*100:.1f}%)</p>
        <p><b>Same Day Total Value:</b> Rs. {same_day_voids['Amount'].sum():,.2f}</p>
    </div>
    '''))

    display(HTML("<h4>Staff with Most Same-Day Voids:</h4>"))
    sd_display = same_day_by_staff.head(15).copy()
    sd_display['Total Value'] = sd_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
    display(sd_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))
else:
    print("Date columns not available for same-day analysis")

Unnamed: 0,Staff,Same Day Voids,Total Value
100,24970.0,107,"Rs. 411,226.88"
81,23266.0,59,"Rs. 256,239.52"
39,6873.0,38,"Rs. 132,434.84"
188,34652.0,29,"Rs. 131,723.27"
172,32543.0,23,"Rs. 73,869.93"
61,10971.0,22,"Rs. 61,872.93"
168,31913.0,22,"Rs. 83,660.26"
36,6110.0,12,"Rs. 48,014.78"
167,31654.0,8,"Rs. 13,484.52"
147,29774.0,8,"Rs. 34,236.04"


---
## 16b. Different Day Void Analysis (Order and Void on Different Days)
Orders voided on a different day than placement - higher risk as it suggests delayed manipulation or after-the-fact voiding

In [61]:
# Different day void analysis
diff_day_voids = pd.DataFrame()

if 'Order_Date_Only' in parent_df.columns and 'Void_Date_Only' in parent_df.columns:
    diff_day_voids = parent_df[parent_df['Order_Date_Only'] != parent_df['Void_Date_Only']].copy()
    diff_day_voids = diff_day_voids[diff_day_voids['Order_Date_Only'].notna() & diff_day_voids['Void_Date_Only'].notna()]

    # Calculate days gap
    diff_day_voids['Days_Gap'] = (pd.to_datetime(diff_day_voids['Void_Date_Only']) -
                                   pd.to_datetime(diff_day_voids['Order_Date_Only'])).dt.days

    # Fill missing staff values for groupby
    diff_day_voids['Void_By_Clean'] = diff_day_voids['Void By '].fillna('Unknown/Missing').astype(str)

    # Stats by voider (including missing)
    diff_day_by_staff = diff_day_voids.groupby('Void_By_Clean').agg({
        order_col: 'count',
        'Amount': 'sum',
        'Days_Gap': 'mean'
    }).reset_index()
    diff_day_by_staff.columns = ['Staff', 'Diff Day Voids', 'Total Value', 'Avg Days Gap']
    diff_day_by_staff = diff_day_by_staff.sort_values('Diff Day Voids', ascending=False)

    # Stats by outlet
    diff_day_by_outlet = diff_day_voids.groupby('Outlet').agg({
        order_col: 'count',
        'Amount': 'sum',
        'Days_Gap': 'mean'
    }).reset_index()
    diff_day_by_outlet.columns = ['Outlet', 'Diff Day Voids', 'Total Value', 'Avg Days Gap']
    diff_day_by_outlet = diff_day_by_outlet.sort_values('Total Value', ascending=False)

    # Breakdown by days gap
    diff_day_voids['Gap_Category'] = pd.cut(diff_day_voids['Days_Gap'],
                                             bins=[0, 1, 3, 7, 30, 1000],
                                             labels=['1 day', '2-3 days', '4-7 days', '8-30 days', '>30 days'])
    gap_breakdown = diff_day_voids['Gap_Category'].value_counts().sort_index()

    # Check for missing staff
    missing_staff_count = diff_day_voids['Void By '].isna().sum()

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Different Day Void Analysis</h3>
        <p><b>Total Different Day Voids:</b> {len(diff_day_voids)} ({len(diff_day_voids)/len(parent_df)*100:.1f}% of all voids)</p>
        <p><b>Total Value:</b> Rs. {diff_day_voids['Amount'].sum():,.2f}</p>
        <p><b>Average Days Gap:</b> {diff_day_voids['Days_Gap'].mean():.1f} days</p>
        <p><b>Max Days Gap:</b> {diff_day_voids['Days_Gap'].max():.0f} days</p>
        <p><b>Voids with Missing Staff Info:</b> {missing_staff_count}</p>
        <p>Risk: Voiding on a different day suggests delayed manipulation or after-the-fact cover-up</p>
    </div>
    '''))

    # Gap breakdown
    display(HTML("<h4>Breakdown by Days Gap:</h4>"))
    gap_df = pd.DataFrame({'Gap Category': gap_breakdown.index, 'Count': gap_breakdown.values})
    gap_df['Percentage'] = (gap_df['Count'] / gap_df['Count'].sum() * 100).round(1).astype(str) + '%'
    display(gap_df.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

    # Staff with most different day voids
    display(HTML("<h4>Staff with Most Different Day Voids:</h4>"))
    dd_staff_display = diff_day_by_staff.head(15).copy()
    dd_staff_display['Total Value'] = dd_staff_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
    dd_staff_display['Avg Days Gap'] = dd_staff_display['Avg Days Gap'].apply(lambda x: f"{x:.1f} days")
    display(dd_staff_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

    # Outlets with most different day voids
    display(HTML("<h4>Outlets with Most Different Day Void Value:</h4>"))
    dd_outlet_display = diff_day_by_outlet.head(15).copy()
    dd_outlet_display['Total Value'] = dd_outlet_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
    dd_outlet_display['Avg Days Gap'] = dd_outlet_display['Avg Days Gap'].apply(lambda x: f"{x:.1f} days")
    display(dd_outlet_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

    # Show all different day voids with details
    display(HTML("<h4>All Different Day Voids (Detailed):</h4>"))
    detail_cols = [order_col, 'Outlet', 'Order Type', 'Void By ', 'Order Date', 'Void Date', 'Days_Gap', 'Amount', 'Reason']
    detail_display = diff_day_voids[[c for c in detail_cols if c in diff_day_voids.columns]].copy()
    detail_display['Amount'] = detail_display['Amount'].apply(lambda x: f"Rs. {x:,.2f}")
    detail_display['Days_Gap'] = detail_display['Days_Gap'].apply(lambda x: f"{x:.0f} days")
    display(detail_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

else:
    print("Date columns not available for different-day analysis")

Unnamed: 0,Gap Category,Count,Percentage
0,1 day,2,100.0%
1,2-3 days,0,0.0%
2,4-7 days,0,0.0%
3,8-30 days,0,0.0%
4,>30 days,0,0.0%


Unnamed: 0,Staff,Diff Day Voids,Total Value,Avg Days Gap
0,31005.0,1,"Rs. 2,703.00",1.0 days
1,Unknown/Missing,1,"Rs. 2,657.40",1.0 days


Unnamed: 0,Outlet,Diff Day Voids,Total Value,Avg Days Gap
0,Kottawa,1,"Rs. 2,703.00",1.0 days
1,Wattala,1,"Rs. 2,657.40",1.0 days


Unnamed: 0,Order No,Outlet,Order Type,Void By,Order Date,Void Date,Days_Gap,Amount,Reason
1244,W71139,Kottawa,Take Away,31005.0,2026-01-03 00:00:00,2026-01-04 00:09:34,1 days,"Rs. 2,703.00",Customer denied the order
2434,MR7887,Wattala,Delivery,,2026-01-31 00:00:00,2026-02-01 09:55:09,1 days,"Rs. 2,657.40",


In [60]:
# Visualize different day voids by gap category
if len(diff_day_voids) > 0:
    # Value by gap category
    gap_value = diff_day_voids.groupby('Gap_Category').agg({
        order_col: 'count',
        'Amount': 'sum'
    }).reset_index()
    gap_value.columns = ['Gap Category', 'Count', 'Total Value']

    fig = px.bar(gap_value, x='Gap Category', y='Count',
                 title='Different Day Voids by Days Gap',
                 color='Total Value', color_continuous_scale='Reds',
                 text='Count')
    fig.update_layout(height=400)
    fig.show()

    # Show high-risk different day voids (gap > 3 days and high value)
    high_risk_diff_day = diff_day_voids[(diff_day_voids['Days_Gap'] > 3) &
                                         (diff_day_voids['Amount'] >= diff_day_voids['Amount'].quantile(0.75))].copy()
    high_risk_diff_day = high_risk_diff_day.sort_values('Amount', ascending=False)

    if len(high_risk_diff_day) > 0:
        display(HTML(f'''
        <div style="padding:10px; border-left:5px solid #B71C1C; margin:10px 0;">
            <h4>High-Risk Different Day Voids (>3 days gap + high value)</h4>
            <p><b>Found:</b> {len(high_risk_diff_day)} orders</p>
            <p><b>Total Value:</b> Rs. {high_risk_diff_day['Amount'].sum():,.2f}</p>
        </div>
        '''))

        hr_cols = [order_col, 'Outlet', 'Order Type', 'Void By ', 'Days_Gap', 'Amount', 'Reason']
        hr_display = high_risk_diff_day[[c for c in hr_cols if c in high_risk_diff_day.columns]].head(25).copy()
        hr_display['Days_Gap'] = hr_display['Days_Gap'].apply(lambda x: f"{x:.0f} days")
        hr_display['Amount'] = hr_display['Amount'].apply(lambda x: f"Rs. {x:,.2f}")
        display(hr_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#B71C1C'), ('color', 'white')]}]))

---
## 17. Weekend vs Weekday Void Patterns
Weekends often have less supervision - check for unusual patterns

In [35]:
# Weekend vs weekday analysis
if 'Void_Date_Parsed' in parent_df.columns:
    parent_df['Day_of_Week'] = parent_df['Void_Date_Parsed'].dt.day_name()
    parent_df['Is_Weekend'] = parent_df['Void_Date_Parsed'].dt.dayofweek >= 5

    day_stats = parent_df.groupby('Day_of_Week').agg({
        order_col: 'count',
        'Amount': ['sum', 'mean']
    }).reset_index()
    day_stats.columns = ['Day', 'Void Count', 'Total Value', 'Avg Value']

    # Reorder days
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    day_stats['Day'] = pd.Categorical(day_stats['Day'], categories=day_order, ordered=True)
    day_stats = day_stats.sort_values('Day')

    weekend_voids = parent_df[parent_df['Is_Weekend'] == True]
    weekday_voids = parent_df[parent_df['Is_Weekend'] == False]

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Weekend vs Weekday Analysis</h3>
        <p><b>Weekday Voids:</b> {len(weekday_voids)} (Rs. {weekday_voids['Amount'].sum():,.2f})</p>
        <p><b>Weekend Voids:</b> {len(weekend_voids)} (Rs. {weekend_voids['Amount'].sum():,.2f})</p>
        <p><b>Avg per Weekday:</b> {len(weekday_voids)/5:.0f} | <b>Avg per Weekend Day:</b> {len(weekend_voids)/2:.0f}</p>
    </div>
    '''))

    # Visualize
    fig = px.bar(day_stats, x='Day', y='Void Count',
                 title='Void Distribution by Day of Week',
                 color='Total Value', color_continuous_scale='Reds')
    fig.update_layout(height=400)
    fig.show()
else:
    print("Void Date not available for day analysis")

---
## 18. Hourly Void Pattern Analysis
Identify peak void hours - unusual patterns may indicate shift-based fraud

In [36]:
# Hourly void pattern
if 'Void_Hour' in parent_df.columns:
    hourly_stats = parent_df.groupby('Void_Hour').agg({
        order_col: 'count',
        'Amount': ['sum', 'mean']
    }).reset_index()
    hourly_stats.columns = ['Hour', 'Void Count', 'Total Value', 'Avg Value']

    # Flag unusual hours (outside normal business 10am-11pm)
    unusual_hours_df = parent_df[(parent_df['Void_Hour'] < 10) | (parent_df['Void_Hour'] >= 23)]

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Hourly Void Pattern</h3>
        <p><b>Unusual Hours Voids (before 10am or after 11pm):</b> {len(unusual_hours_df)}</p>
        <p><b>Peak Void Hour:</b> {hourly_stats.loc[hourly_stats['Void Count'].idxmax(), 'Hour']}:00</p>
    </div>
    '''))

    # Visualize
    fig = px.bar(hourly_stats, x='Hour', y='Void Count',
                 title='Void Distribution by Hour',
                 color='Total Value', color_continuous_scale='Reds')
    fig.update_layout(height=400, xaxis=dict(tickmode='linear', dtick=1))
    fig.add_vrect(x0=-0.5, x1=9.5, fillcolor="red", opacity=0.1, annotation_text="Low Supervision")
    fig.add_vrect(x0=22.5, x1=23.5, fillcolor="red", opacity=0.1)
    fig.show()
else:
    print("Void Hour not available")

---
## 19. Consecutive Void Detection
Multiple voids in sequence by same staff - possible batch voiding scheme

In [37]:
# Consecutive void detection - multiple voids within short time window by same person
consecutive_voids = []

if 'Void_Date_Parsed' in parent_df.columns and 'Void By ' in parent_df.columns:
    # Sort by voider and void time
    sorted_df = parent_df.sort_values(['Void By ', 'Void_Date_Parsed']).copy()
    sorted_df = sorted_df[sorted_df['Void_Date_Parsed'].notna()]

    # Calculate time between consecutive voids by same person
    sorted_df['Prev_Void_Time'] = sorted_df.groupby('Void By ')['Void_Date_Parsed'].shift(1)
    sorted_df['Time_Since_Prev'] = (sorted_df['Void_Date_Parsed'] - sorted_df['Prev_Void_Time']).dt.total_seconds() / 60

    # Flag voids within 10 minutes of previous void by same person
    rapid_sequence = sorted_df[sorted_df['Time_Since_Prev'] <= 10].copy()

    # Group by voider
    rapid_by_staff = rapid_sequence.groupby('Void By ').agg({
        order_col: 'count',
        'Amount': 'sum'
    }).reset_index()
    rapid_by_staff.columns = ['Staff', 'Rapid Sequence Count', 'Total Value']
    rapid_by_staff = rapid_by_staff.sort_values('Rapid Sequence Count', ascending=False)

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Consecutive Void Detection</h3>
        <p><b>Found:</b> {len(rapid_sequence)} voids within 10 minutes of another void by same person</p>
        <p><b>Total Value:</b> Rs. {rapid_sequence['Amount'].sum():,.2f}</p>
        <p>Risk: Rapid consecutive voids may indicate batch processing of fraudulent orders</p>
    </div>
    '''))

    if len(rapid_by_staff) > 0:
        rb_display = rapid_by_staff.head(15).copy()
        rb_display['Total Value'] = rb_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
        display(rb_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))
else:
    print("Required columns not available")

Unnamed: 0,Staff,Rapid Sequence Count,Total Value
9,24970.0,31,"Rs. 107,116.86"
7,23266.0,12,"Rs. 45,941.74"
6,10971.0,11,"Rs. 31,256.17"
5,6873.0,8,"Rs. 19,899.50"
19,32543.0,5,"Rs. 14,157.75"
17,31654.0,3,"Rs. 3,834.51"
4,6110.0,3,"Rs. 11,780.00"
1,3791.0,2,"Rs. 3,482.51"
14,29774.0,2,"Rs. 12,405.02"
23,37188.0,2,"Rs. 7,578.50"


---
## 20. Staff-Outlet Concentration Analysis
Staff who void mostly at one outlet vs multiple - concentrated patterns are more suspicious

In [38]:
# Staff-outlet concentration analysis
staff_outlet_matrix = pd.DataFrame()

if 'Void By ' in parent_df.columns:
    staff_outlet = parent_df.groupby(['Void By ', 'Outlet']).agg({
        order_col: 'count',
        'Amount': 'sum'
    }).reset_index()
    staff_outlet.columns = ['Staff', 'Outlet', 'Void Count', 'Total Value']

    # Calculate concentration - how many outlets does each staff void at
    staff_concentration = staff_outlet.groupby('Staff').agg({
        'Outlet': 'nunique',
        'Void Count': 'sum',
        'Total Value': 'sum'
    }).reset_index()
    staff_concentration.columns = ['Staff', 'Outlets Worked', 'Total Voids', 'Total Value']

    # Find primary outlet for each staff
    primary_outlet = staff_outlet.loc[staff_outlet.groupby('Staff')['Void Count'].idxmax()][['Staff', 'Outlet', 'Void Count']]
    primary_outlet.columns = ['Staff', 'Primary Outlet', 'Primary Outlet Voids']

    staff_concentration = staff_concentration.merge(primary_outlet, on='Staff')
    staff_concentration['Concentration %'] = (staff_concentration['Primary Outlet Voids'] / staff_concentration['Total Voids'] * 100).round(1)
    staff_concentration = staff_concentration.sort_values('Total Value', ascending=False)

    # Flag high concentration (>90% at one outlet with high void count)
    high_concentration = staff_concentration[(staff_concentration['Concentration %'] > 90) & (staff_concentration['Total Voids'] > 5)]

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Staff-Outlet Concentration</h3>
        <p><b>Staff with >90% voids at single outlet (and >5 voids):</b> {len(high_concentration)}</p>
        <p>High concentration at one outlet may indicate localized fraud scheme</p>
    </div>
    '''))

    sc_display = staff_concentration.head(20).copy()
    sc_display['Total Value'] = sc_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
    sc_display['Concentration %'] = sc_display['Concentration %'].apply(lambda x: f"{x}%")
    display(sc_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

Unnamed: 0,Staff,Outlets Worked,Total Voids,Total Value,Primary Outlet,Primary Outlet Voids,Concentration %
100,24970.0,59,107,"Rs. 411,226.88",Havelock,5,4.7%
81,23266.0,41,59,"Rs. 256,239.52",Dehiwela,3,5.1%
39,6873.0,31,38,"Rs. 132,434.84",Rajagiriya,3,7.9%
188,34652.0,23,29,"Rs. 131,723.27",Athurugiriya,2,6.9%
168,31913.0,18,22,"Rs. 83,660.26",Borella,2,9.1%
172,32543.0,19,23,"Rs. 73,869.93",Kotahena,3,13.0%
61,10971.0,20,22,"Rs. 61,872.93",Ja-Ela,3,13.6%
36,6110.0,1,12,"Rs. 48,014.78",Katugasthota,12,100.0%
145,29737.0,1,6,"Rs. 44,684.56",Kurunegala 2,6,100.0%
132,28748.0,1,7,"Rs. 36,814.15",Colombo City Center PH,7,100.0%


---
## 21. Void Category by Order Type Cross-Analysis
Which void reasons are associated with which order types - detect unusual patterns

In [39]:
# Category by order type cross-analysis
category_ordertype = parent_df.groupby(['Order Type', 'Predicted_Category']).agg({
    order_col: 'count',
    'Amount': 'sum'
}).reset_index()
category_ordertype.columns = ['Order Type', 'Category', 'Count', 'Total Value']

# Pivot for heatmap
pivot_count = category_ordertype.pivot(index='Category', columns='Order Type', values='Count').fillna(0)

display(HTML(f'''
<div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
    <h3>Void Category by Order Type</h3>
    <p>Cross-reference to find unusual patterns (e.g., many "customer cancel" for dine-in is suspicious)</p>
</div>
'''))

# Heatmap
fig = px.imshow(pivot_count,
                title='Void Count: Category vs Order Type',
                color_continuous_scale='Reds',
                aspect='auto')
fig.update_layout(height=600)
fig.show()

# Suspicious combinations for takeaway/dine-in
suspicious_combos = category_ordertype[
    (category_ordertype['Order Type'].str.lower().str.contains('take|dine', na=False)) &
    (category_ordertype['Category'].isin(['order without reason/ remark', 'testing', 'other']))
]

if len(suspicious_combos) > 0:
    display(HTML("<h4>Suspicious Combinations (Takeaway/Dine-in with questionable reasons):</h4>"))
    susp_display = suspicious_combos.sort_values('Total Value', ascending=False)
    susp_display['Total Value'] = susp_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
    display(susp_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

Unnamed: 0,Order Type,Category,Count,Total Value
52,Take Away,testing,14,"Rs. 19,277.03"
46,Take Away,order without reason/ remark,4,"Rs. 13,650.00"
37,Dine In,testing,3,"Rs. 7,183.25"
47,Take Away,other,1,"Rs. 1,295.00"


---
## 22. End of Day / Shift Voids
Voids occurring near closing time - less oversight, higher fraud risk

In [42]:
# End of day / shift voids (11pm-12am typically closing prep)
closing_time_voids = pd.DataFrame()

if 'Void_Hour' in parent_df.columns:
    closing_time_voids = parent_df[(parent_df['Void_Hour'] >= 23) & (parent_df['Void_Hour'] <= 24)].copy()

    # Stats by outlet for closing time voids
    closing_by_outlet = closing_time_voids.groupby('Outlet').agg({
        order_col: 'count',
        'Amount': 'sum'
    }).reset_index()
    closing_by_outlet.columns = ['Outlet', 'Closing Time Voids', 'Total Value']
    closing_by_outlet = closing_by_outlet.sort_values('Total Value', ascending=False)

    # Compare to total voids per outlet
    closing_by_outlet = closing_by_outlet.merge(outlet_total_voids, on='Outlet')
    closing_by_outlet['Closing %'] = (closing_by_outlet['Closing Time Voids'] / closing_by_outlet['Total Voids'] * 100).round(1)

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Closing Time Voids (11pm - 12am)</h3>
        <p><b>Found:</b> {len(closing_time_voids)} voids during closing hours</p>
        <p><b>Total Value:</b> Rs. {closing_time_voids['Amount'].sum():,.2f}</p>
        <p>Risk: Closing time has reduced supervision, easier to manipulate</p>
    </div>
    '''))

    ct_display = closing_by_outlet.head(15).copy()
    ct_display['Total Value'] = ct_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
    ct_display['Closing %'] = ct_display['Closing %'].apply(lambda x: f"{x}%")
    display(ct_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

Unnamed: 0,Outlet,Closing Time Voids,Total Value,Total Voids,Closing %
0,Kochchikade,15,"Rs. 32,783.00",22,68.2%
1,Union Place,1,"Rs. 16,745.03",8,12.5%
2,Trincomalee,4,"Rs. 15,496.60",22,18.2%
3,Makola,3,"Rs. 15,080.25",10,30.0%
4,Rajagiriya,3,"Rs. 13,045.61",21,14.3%
5,Kotahena,2,"Rs. 12,744.12",14,14.3%
6,Jaffna,5,"Rs. 12,706.77",11,45.5%
7,Athurugiriya,3,"Rs. 9,561.25",8,37.5%
8,Godagama,2,"Rs. 9,202.00",6,33.3%
9,Negambo,6,"Rs. 8,876.26",24,25.0%


---
## 23. High-Value Takeaway and Dine-in Voids (Combined Risk)
Takeaway/Dine-in orders with high amounts - double risk factor

In [43]:
# High-value takeaway and dine-in (combined risk)
high_value_ta_di = parent_df[
    (parent_df['Amount'] >= amount_threshold) &
    (parent_df['Order Type'].str.lower().str.contains('take|dine', na=False))
].sort_values('Amount', ascending=False).copy()

display(HTML(f'''
<div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
    <h3>High-Value Takeaway/Dine-in Voids (Priority)</h3>
    <p><b>Found:</b> {len(high_value_ta_di)} high-value orders in takeaway/dine-in categories</p>
    <p><b>Total Value:</b> Rs. {high_value_ta_di['Amount'].sum():,.2f}</p>
    <p>Risk: Combination of high amount + immediate consumption order type</p>
</div>
'''))

if len(high_value_ta_di) > 0:
    hvtd_cols = [order_col, 'Outlet', 'Order Type', 'Void By ', 'Amount', 'Predicted_Category', 'Reason']
    hvtd_display = high_value_ta_di[[c for c in hvtd_cols if c in high_value_ta_di.columns]].head(30).copy()
    hvtd_display['Amount'] = hvtd_display['Amount'].apply(lambda x: f"Rs. {x:,.2f}")
    display(hvtd_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

Unnamed: 0,Order No,Outlet,Order Type,Void By,Amount,Predicted_Category,Reason
148,N50066,Bambalapitiya,Dine In,6391.0,"Rs. 17,974.00",cus. Change the order,Customer wanted to change the order
2409,U54642,Union Place,Take Away,31988.0,"Rs. 16,745.03",promotion,Customer wanted to change the order
1367,U52727,Kurunegala 2,Take Away,29737.0,"Rs. 15,450.03",cus. Change the order,Customer wanted to change the order
2378,U50388,Union Place,Dine In,29082.0,"Rs. 15,447.75",cus. Change the order,Customer wanted to change the order
1382,U53207,Kurunegala 2,Take Away,29737.0,"Rs. 14,525.02",promotion,Customer wanted to change the order
425,U68934,Dehiwela,Dine In,11536.0,"Rs. 14,469.50",promotion,Customer want to have a Promotion
1924,N72513,Nuwara Eliya,Dine In,9506.0,"Rs. 13,899.75",Customer denied the order,Customer denied the order
2468,Y37699,Yakkala,Dine In,25810.0,"Rs. 13,577.25",double punch,Customer wanted to change the order
2339,T65615,Trincomalee,Dine In,6925.0,"Rs. 13,362.25",Cashier mistake,Customer wanted to change the order
810,K37055,Kandana,Dine In,24832.0,"Rs. 12,993.53",cus. Change the order,Customer wanted to change the order


---
## 24. Month-End Spike Detection
Check for unusual void spikes at month-end - potential inventory manipulation

In [44]:
# Daily void trend with month-end analysis
if 'Void_Date_Parsed' in parent_df.columns:
    parent_df['Void_Day'] = parent_df['Void_Date_Parsed'].dt.day

    daily_stats = parent_df.groupby(parent_df['Void_Date_Parsed'].dt.date).agg({
        order_col: 'count',
        'Amount': 'sum'
    }).reset_index()
    daily_stats.columns = ['Date', 'Void Count', 'Total Value']
    daily_stats['Date'] = pd.to_datetime(daily_stats['Date'])
    daily_stats['Day'] = daily_stats['Date'].dt.day

    # Flag month-end (last 3 days)
    daily_stats['Is_Month_End'] = daily_stats['Day'] >= 28

    month_end_voids = parent_df[parent_df['Void_Day'] >= 28]
    avg_daily = daily_stats['Void Count'].mean()
    month_end_avg = daily_stats[daily_stats['Is_Month_End']]['Void Count'].mean()

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Month-End Void Analysis</h3>
        <p><b>Average Daily Voids:</b> {avg_daily:.1f}</p>
        <p><b>Month-End Average (28th onwards):</b> {month_end_avg:.1f}</p>
        <p><b>Month-End Total Value:</b> Rs. {month_end_voids['Amount'].sum():,.2f}</p>
        <p>{'Warning: Month-end spike detected' if month_end_avg > avg_daily * 1.2 else 'No significant month-end spike'}</p>
    </div>
    '''))

    # Visualize daily trend
    fig = px.bar(daily_stats, x='Date', y='Void Count',
                 title='Daily Void Trend',
                 color='Is_Month_End',
                 color_discrete_map={True: 'red', False: 'steelblue'})
    fig.add_hline(y=avg_daily, line_dash="dash", annotation_text=f"Avg: {avg_daily:.0f}")
    fig.update_layout(height=400, showlegend=False)
    fig.show()

---
## 25. Voider Cross-Voiding Analysis
Staff voiding orders placed by specific other staff - potential collusion patterns

In [45]:
# Cross-voiding analysis - who voids whose orders
if 'Placed By' in parent_df.columns and 'Void By ' in parent_df.columns:
    # Exclude self-voids
    cross_void_df = parent_df[parent_df['Placed_Clean'] != parent_df['Voider_Clean']].copy()
    cross_void_df = cross_void_df[(cross_void_df['Placed_Clean'] != 'nan') & (cross_void_df['Voider_Clean'] != 'nan')]

    # Create pair analysis
    cross_pairs = cross_void_df.groupby(['Placed By', 'Void By ']).agg({
        order_col: 'count',
        'Amount': 'sum'
    }).reset_index()
    cross_pairs.columns = ['Placed By', 'Voided By', 'Void Count', 'Total Value']
    cross_pairs = cross_pairs.sort_values('Void Count', ascending=False)

    # Flag pairs with high frequency (>3 times same pair)
    frequent_pairs = cross_pairs[cross_pairs['Void Count'] >= 3]

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Cross-Voiding Patterns</h3>
        <p><b>Total Cross-Voids:</b> {len(cross_void_df)}</p>
        <p><b>Frequent Pairs (3+ occurrences):</b> {len(frequent_pairs)}</p>
        <p>Risk: Repeated voiding of one staff's orders by another may indicate collusion</p>
    </div>
    '''))

    if len(frequent_pairs) > 0:
        display(HTML("<h4>Staff Pairs with Frequent Cross-Voiding:</h4>"))
        fp_display = frequent_pairs.head(20).copy()
        fp_display['Total Value'] = fp_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
        display(fp_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

Unnamed: 0,Placed By,Voided By,Void Count,Total Value
313,6323,24970.0,47,"Rs. 189,434.01"
312,6323,23266.0,38,"Rs. 149,903.74"
310,6323,6873.0,15,"Rs. 46,898.07"
311,6323,10971.0,14,"Rs. 43,717.05"
315,6323,32543.0,13,"Rs. 42,512.99"
314,6323,31913.0,10,"Rs. 38,489.30"
316,6323,34652.0,9,"Rs. 28,285.62"
427,PICKME,31654.0,6,"Rs. 9,832.52"
6,009112,29774.0,5,"Rs. 21,401.02"
416,PICKME,3791.0,5,"Rs. 15,121.27"


---
## 26. Cashier Mistake Category Deep Dive
Excessive "cashier mistake" claims by specific staff - easy excuse for fraud

In [46]:
# Cashier mistake analysis - who claims this most
cashier_mistake_df = parent_df[parent_df['Predicted_Category'] == 'Cashier mistake'].copy()

if len(cashier_mistake_df) > 0:
    cashier_by_staff = cashier_mistake_df.groupby('Void By ').agg({
        order_col: 'count',
        'Amount': ['sum', 'mean']
    }).reset_index()
    cashier_by_staff.columns = ['Staff', 'Mistake Count', 'Total Value', 'Avg Value']
    cashier_by_staff = cashier_by_staff.sort_values('Mistake Count', ascending=False)

    # Compare to their total voids
    staff_totals = parent_df.groupby('Void By ')[order_col].count().reset_index()
    staff_totals.columns = ['Staff', 'Total Voids']
    cashier_by_staff = cashier_by_staff.merge(staff_totals, on='Staff')
    cashier_by_staff['Mistake %'] = (cashier_by_staff['Mistake Count'] / cashier_by_staff['Total Voids'] * 100).round(1)

    # Flag staff with high cashier mistake percentage
    high_mistake_staff = cashier_by_staff[(cashier_by_staff['Mistake %'] > 30) & (cashier_by_staff['Mistake Count'] >= 3)]

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Cashier Mistake Pattern Analysis</h3>
        <p><b>Total "Cashier Mistake" Voids:</b> {len(cashier_mistake_df)}</p>
        <p><b>Total Value:</b> Rs. {cashier_mistake_df['Amount'].sum():,.2f}</p>
        <p><b>Staff with >30% "mistake" rate (3+ voids):</b> {len(high_mistake_staff)}</p>
        <p>Risk: "Cashier mistake" is an easy excuse - high rates are suspicious</p>
    </div>
    '''))

    cm_display = cashier_by_staff.head(15).copy()
    cm_display['Total Value'] = cm_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
    cm_display['Avg Value'] = cm_display['Avg Value'].apply(lambda x: f"Rs. {x:,.2f}")
    cm_display['Mistake %'] = cm_display['Mistake %'].apply(lambda x: f"{x}%")
    display(cm_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))
else:
    print("No cashier mistake category found")

Unnamed: 0,Staff,Mistake Count,Total Value,Avg Value,Total Voids,Mistake %
0,6925.0,3,"Rs. 19,817.26","Rs. 6,605.75",4,75.0%
1,24970.0,3,"Rs. 13,764.75","Rs. 4,588.25",107,2.8%
2,11159.0,2,"Rs. 9,306.25","Rs. 4,653.13",2,100.0%
3,31139.0,2,"Rs. 11,255.25","Rs. 5,627.63",3,66.7%
4,26421.0,2,"Rs. 8,856.26","Rs. 4,428.13",5,40.0%
5,6110.0,2,"Rs. 11,233.75","Rs. 5,616.88",12,16.7%
6,31595.0,2,"Rs. 10,799.76","Rs. 5,399.88",3,66.7%
7,3101.0,1,"Rs. 2,397.25","Rs. 2,397.25",1,100.0%
8,6873.0,1,Rs. 825.00,Rs. 825.00,38,2.6%
9,6654.0,1,"Rs. 5,045.01","Rs. 5,045.01",4,25.0%


---
## 27. Double Punch Analysis
Orders marked as "double punch" - verify these are legitimate duplicates

In [52]:
# Double punch analysis
double_punch_df = parent_df[parent_df['Predicted_Category'] == 'double punch'].copy()

if len(double_punch_df) > 0:
    dp_by_outlet = double_punch_df.groupby('Outlet').agg({
        order_col: 'count',
        'Amount': 'sum'
    }).reset_index()
    dp_by_outlet.columns = ['Outlet', 'Double Punch Count', 'Total Value']
    dp_by_outlet = dp_by_outlet.sort_values('Double Punch Count', ascending=False)

    dp_by_staff = double_punch_df.groupby('Void By ').agg({
        order_col: 'count',
        'Amount': 'sum'
    }).reset_index()
    dp_by_staff.columns = ['Staff', 'Double Punch Count', 'Total Value']
    dp_by_staff = dp_by_staff.sort_values('Double Punch Count', ascending=False)

    display(HTML(f'''
    <div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
        <h3>Double Punch Analysis</h3>
        <p><b>Total "Double Punch" Voids:</b> {len(double_punch_df)}</p>
        <p><b>Total Value:</b> Rs. {double_punch_df['Amount'].sum():,.2f}</p>
        <p>Action: Cross-check with actual duplicate orders in POS system</p>
    </div>
    '''))

    display(HTML("<h4>By Outlet:</h4>"))
    dp_out_display = dp_by_outlet.head(10).copy()
    dp_out_display['Total Value'] = dp_out_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
    display(dp_out_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))

    display(HTML("<h4>By Staff:</h4>"))
    dp_staff_display = dp_by_staff.head(10).copy()
    dp_staff_display['Total Value'] = dp_staff_display['Total Value'].apply(lambda x: f"Rs. {x:,.2f}")
    display(dp_staff_display.style.set_table_styles([{'selector': 'th', 'props': [('background-color', '#D32F2F'), ('color', 'white')]}]))
else:
    print("No double punch category found")

Unnamed: 0,Outlet,Double Punch Count,Total Value
4,Kotahena,2,"Rs. 10,889.75"
1,Dambulla,1,"Rs. 4,095.00"
0,Bambalapitiya,1,"Rs. 4,945.00"
2,Havelock,1,"Rs. 13,136.50"
3,Kilinochchi,1,"Rs. 3,203.50"
5,Kottawa,1,"Rs. 3,332.50"
6,Mount Lavinia,1,"Rs. 3,929.12"
7,Piliyandala,1,"Rs. 2,270.00"
8,Ragama,1,"Rs. 6,031.29"
9,Tangalle,1,"Rs. 6,321.00"


Unnamed: 0,Staff,Double Punch Count,Total Value
1,24970.0,5,"Rs. 22,920.00"
0,23266.0,4,"Rs. 27,085.16"
3,32543.0,2,"Rs. 8,148.50"
2,25810.0,1,"Rs. 13,577.25"


---
## 28. Extended Fraud Risk Score (Including New Flags)
Updated risk scoring with additional detection criteria

In [53]:
# Extended fraud risk scoring with all new flags
# Initialize if base fraud flags don't exist yet
if 'Fraud_Flags' not in parent_df.columns:
    parent_df['Fraud_Flags'] = 0
    parent_df['Fraud_Reasons'] = ''

    # Recreate base flags
    parent_df.loc[parent_df['Amount'] >= amount_threshold, 'Fraud_Flags'] += 1
    parent_df.loc[parent_df['Amount'] >= amount_threshold, 'Fraud_Reasons'] += 'High Value; '

    parent_df.loc[parent_df['Predicted_Category'] == 'order without reason/ remark', 'Fraud_Flags'] += 2
    parent_df.loc[parent_df['Predicted_Category'] == 'order without reason/ remark', 'Fraud_Reasons'] += 'No Reason; '

    parent_df.loc[parent_df['Predicted_Category'] == 'testing', 'Fraud_Flags'] += 1
    parent_df.loc[parent_df['Predicted_Category'] == 'testing', 'Fraud_Reasons'] += 'Testing; '

    if 'Is_Round' in parent_df.columns:
        parent_df.loc[parent_df['Is_Round'] == True, 'Fraud_Flags'] += 1
        parent_df.loc[parent_df['Is_Round'] == True, 'Fraud_Reasons'] += 'Round Amount; '

    if 'Void_Hour' in parent_df.columns:
        parent_df.loc[(parent_df['Void_Hour'] >= 22) | (parent_df['Void_Hour'] <= 5), 'Fraud_Flags'] += 1
        parent_df.loc[(parent_df['Void_Hour'] >= 22) | (parent_df['Void_Hour'] <= 5), 'Fraud_Reasons'] += 'Late Night; '

    if 'Time_Gap_Hours' in parent_df.columns:
        parent_df.loc[parent_df['Time_Gap_Hours'] > 24, 'Fraud_Flags'] += 2
        parent_df.loc[parent_df['Time_Gap_Hours'] > 24, 'Fraud_Reasons'] += 'Extreme Delay; '

parent_df['Extended_Fraud_Flags'] = parent_df['Fraud_Flags'].copy()
parent_df['Extended_Fraud_Reasons'] = parent_df['Fraud_Reasons'].copy()

# Add new flags

# Takeaway order type (1 point)
parent_df.loc[parent_df['Order Type'].str.lower().str.contains('take', na=False), 'Extended_Fraud_Flags'] += 1
parent_df.loc[parent_df['Order Type'].str.lower().str.contains('take', na=False), 'Extended_Fraud_Reasons'] += 'Takeaway; '

# Dine-in order type (1 point)
parent_df.loc[parent_df['Order Type'].str.lower().str.contains('dine', na=False), 'Extended_Fraud_Flags'] += 1
parent_df.loc[parent_df['Order Type'].str.lower().str.contains('dine', na=False), 'Extended_Fraud_Reasons'] += 'Dine-in; '

# Self-void (2 points)
if 'Placed_Clean' in parent_df.columns and 'Voider_Clean' in parent_df.columns:
    self_void_mask = (parent_df['Placed_Clean'] == parent_df['Voider_Clean']) & (parent_df['Placed_Clean'] != 'nan')
    parent_df.loc[self_void_mask, 'Extended_Fraud_Flags'] += 2
    parent_df.loc[self_void_mask, 'Extended_Fraud_Reasons'] += 'Self-Void; '

# Quick void within 30 min (1 point)
if 'Time_Gap_Minutes' in parent_df.columns:
    quick_mask = (parent_df['Time_Gap_Minutes'] >= 0) & (parent_df['Time_Gap_Minutes'] <= 30)
    parent_df.loc[quick_mask, 'Extended_Fraud_Flags'] += 1
    parent_df.loc[quick_mask, 'Extended_Fraud_Reasons'] += 'Quick Void; '

# Closing time void (1 point)
if 'Void_Hour' in parent_df.columns:
    closing_mask = (parent_df['Void_Hour'] >= 21) & (parent_df['Void_Hour'] <= 23)
    parent_df.loc[closing_mask, 'Extended_Fraud_Flags'] += 1
    parent_df.loc[closing_mask, 'Extended_Fraud_Reasons'] += 'Closing Time; '

# New risk levels
parent_df['Extended_Risk_Level'] = pd.cut(parent_df['Extended_Fraud_Flags'],
                                           bins=[-1, 1, 3, 5, 20],
                                           labels=['Low', 'Medium', 'High', 'Critical'])

# High risk with extended scoring
extended_high_risk = parent_df[parent_df['Extended_Fraud_Flags'] >= 4].sort_values(
    ['Extended_Fraud_Flags', 'Amount'], ascending=[False, False]).copy()
extended_critical = parent_df[parent_df['Extended_Fraud_Flags'] >= 6].copy()

display(HTML(f'''
<div style="padding:15px; border-left:5px solid #D32F2F; margin:10px 0;">
    <h3>Extended Fraud Risk Summary</h3>
    <p><b>Critical Risk (6+ flags):</b> {len(extended_critical)}</p>
    <p><b>High Risk (4+ flags):</b> {len(extended_high_risk)}</p>
    <p><b>Total Value at Risk:</b> Rs. {extended_high_risk['Amount'].sum():,.2f}</p>
</div>
'''))

# Show extended risk distribution
ext_risk_summary = parent_df['Extended_Risk_Level'].value_counts().reindex(['Critical', 'High', 'Medium', 'Low'])
print("Extended Risk Distribution:")
print(ext_risk_summary.to_string())

Extended Risk Distribution:
Extended_Risk_Level
Critical      0
High         33
Medium      404
Low         276


In [54]:
# Display extended high risk orders
display(HTML("<h3>Extended High Risk Orders (Priority Investigation)</h3>"))

if len(extended_high_risk) > 0:
    ext_cols = [order_col, 'Outlet', 'Order Type', 'Void By ', 'Amount', 'Extended_Fraud_Flags',
                'Extended_Fraud_Reasons', 'Extended_Risk_Level']
    ext_display = extended_high_risk[[c for c in ext_cols if c in extended_high_risk.columns]].head(50).copy()
    ext_display['Amount'] = ext_display['Amount'].apply(lambda x: f"Rs. {x:,.2f}")

    def highlight_ext_risk(row):
        if row['Extended_Risk_Level'] == 'Critical':
            return ['background-color: #B71C1C; color: white'] * len(row)
        elif row['Extended_Risk_Level'] == 'High':
            return ['background-color: #D32F2F; color: white'] * len(row)
        return [''] * len(row)

    display(ext_display.style.apply(highlight_ext_risk, axis=1).set_table_styles(
        [{'selector': 'th', 'props': [('background-color', '#B71C1C'), ('color', 'white')]}]))
else:
    print("No extended high-risk orders found")

Unnamed: 0,Order No,Outlet,Order Type,Void By,Amount,Extended_Fraud_Flags,Extended_Fraud_Reasons,Extended_Risk_Level
1130,P52822,Kolonnawa,Take Away,7477.0,"Rs. 10,405.02",5,High Value; Late Night; Takeaway; Quick Void; Closing Time;,High
1554,ML2148,Matara,Take Away,24970.0,"Rs. 5,450.00",5,No Reason; Late Night; Takeaway; Closing Time;,High
2409,U54642,Union Place,Take Away,31988.0,"Rs. 16,745.03",4,High Value; Late Night; Takeaway; Closing Time;,High
425,U68934,Dehiwela,Dine In,11536.0,"Rs. 14,469.50",4,High Value; Late Night; Dine-in; Closing Time;,High
308,D73221,Dambulla,Dine In,22203.0,"Rs. 12,136.75",4,High Value; Late Night; Dine-in; Closing Time;,High
1854,N77080,Nittambuwa,Dine In,2169.0,"Rs. 11,226.12",4,High Value; Late Night; Dine-in; Closing Time;,High
768,K45189,Kadawatha,Dine In,30234.0,"Rs. 10,922.00",4,High Value; Late Night; Dine-in; Closing Time;,High
1994,MH4319,Pelawatta,Delivery,34652.0,"Rs. 9,314.88",4,High Value; No Reason; Closing Time;,High
1536,LY8478,Matara,Delivery,10971.0,"Rs. 7,557.25",4,No Reason; Late Night; Closing Time;,High
863,K55123,Karapitiya,Take Away,32263.0,"Rs. 5,745.01",4,Late Night; Takeaway; Quick Void; Closing Time;,High


---
## 29. Export Extended Fraud Report
Complete report with all new analysis sheets

In [56]:
# Export extended fraud detection report
extended_output_file = 'Extended_Fraud_Detection_Report.xlsx'

with pd.ExcelWriter(extended_output_file, engine='xlsxwriter') as writer:

    # Sheet 1: All Orders with Extended Risk Scoring
    parent_df.to_excel(writer, sheet_name='1_All_Orders_Extended', index=False)

    # Sheet 2: Extended Critical Risk
    extended_critical.to_excel(writer, sheet_name='2_CRITICAL_Extended', index=False)

    # Sheet 3: Extended High Risk
    extended_high_risk.to_excel(writer, sheet_name='3_HIGH_RISK_Extended', index=False)

    # Sheet 4: High Value Takeaway/Dine-in
    high_value_ta_di.to_excel(writer, sheet_name='4_HighValue_TA_DI', index=False)

    # Sheet 5: Self-Voids
    if len(self_void_df) > 0:
        self_void_df.to_excel(writer, sheet_name='5_Self_Voids', index=False)

    # Sheet 6: Quick Voids
    if len(quick_void_df) > 0:
        quick_void_df.to_excel(writer, sheet_name='6_Quick_Voids', index=False)

    # Sheet 7: Takeaway Voids
    takeaway_voids.to_excel(writer, sheet_name='7_Takeaway_Voids', index=False)

    # Sheet 8: Dine-in Voids
    dinein_voids.to_excel(writer, sheet_name='8_Dinein_Voids', index=False)

    # Sheet 9: Closing Time Voids
    if len(closing_time_voids) > 0:
        closing_time_voids.to_excel(writer, sheet_name='9_Closing_Time', index=False)

    # Sheet 10: Cashier Mistakes
    if len(cashier_mistake_df) > 0:
        cashier_mistake_df.to_excel(writer, sheet_name='10_Cashier_Mistakes', index=False)

    # Sheet 11: Double Punch
    if len(double_punch_df) > 0:
        double_punch_df.to_excel(writer, sheet_name='11_Double_Punch', index=False)

    # Sheet 12: Staff Concentration
    if len(staff_concentration) > 0:
        staff_concentration.to_excel(writer, sheet_name='12_Staff_Concentration', index=False)

    # Sheet 13: Cross-Voiding Pairs
    if 'cross_pairs' in dir() and len(cross_pairs) > 0:
        cross_pairs.to_excel(writer, sheet_name='13_Cross_Void_Pairs', index=False)

    # Sheet 14: Order Type Stats
    order_type_stats.to_excel(writer, sheet_name='14_Order_Type_Stats', index=False)

    # Sheet 15: Extended Summary
    extended_summary = pd.DataFrame({
        'Metric': [
            'Total Orders Analyzed',
            'Takeaway Voids',
            'Dine-in Voids',
            'Delivery Voids',
            'High-Value Takeaway/Dine-in',
            'Self-Voids (Placed By = Void By)',
            'Quick Voids (within 30 min)',
            'Closing Time Voids (9-11pm)',
            'Cashier Mistake Claims',
            'Double Punch Claims',
            'Same Day Voids',
            'EXTENDED CRITICAL (6+ flags)',
            'EXTENDED HIGH RISK (4+ flags)'
        ],
        'Count': [
            len(parent_df),
            len(takeaway_voids),
            len(dinein_voids),
            len(parent_df[parent_df['Order Type'].str.lower().str.contains('deliv', na=False)]),
            len(high_value_ta_di),
            len(self_void_df) if len(self_void_df) > 0 else 0,
            len(quick_void_df) if len(quick_void_df) > 0 else 0,
            len(closing_time_voids) if len(closing_time_voids) > 0 else 0,
            len(cashier_mistake_df) if len(cashier_mistake_df) > 0 else 0,
            len(double_punch_df) if len(double_punch_df) > 0 else 0,
            len(same_day_voids) if len(same_day_voids) > 0 else 0,
            len(extended_critical),
            len(extended_high_risk)
        ],
        'Total Value (Rs.)': [
            parent_df['Amount'].sum(),
            takeaway_voids['Amount'].sum(),
            dinein_voids['Amount'].sum(),
            parent_df[parent_df['Order Type'].str.lower().str.contains('deliv', na=False)]['Amount'].sum(),
            high_value_ta_di['Amount'].sum(),
            self_void_df['Amount'].sum() if len(self_void_df) > 0 else 0,
            quick_void_df['Amount'].sum() if len(quick_void_df) > 0 else 0,
            closing_time_voids['Amount'].sum() if len(closing_time_voids) > 0 else 0,
            cashier_mistake_df['Amount'].sum() if len(cashier_mistake_df) > 0 else 0,
            double_punch_df['Amount'].sum() if len(double_punch_df) > 0 else 0,
            same_day_voids['Amount'].sum() if len(same_day_voids) > 0 else 0,
            extended_critical['Amount'].sum(),
            extended_high_risk['Amount'].sum()
        ]
    })
    extended_summary.to_excel(writer, sheet_name='15_Extended_Summary', index=False)

print(f"Extended Fraud Report exported to: {extended_output_file}")
print("")
print("New sheets included:")
print("   1. All Orders with Extended Risk Scoring")
print("   2. Critical Risk (6+ flags)")
print("   3. High Risk (4+ flags)")
print("   4. High-Value Takeaway/Dine-in")
print("   5. Self-Voids")
print("   6. Quick Voids")
print("   7. Takeaway Voids")
print("   8. Dine-in Voids")
print("   9. Closing Time Voids")
print("   10. Cashier Mistakes")
print("   11. Double Punch")
print("   12. Staff Concentration")
print("   13. Cross-Void Pairs")
print("   14. Order Type Stats")
print("   15. Extended Summary")

Extended Fraud Report exported to: Extended_Fraud_Detection_Report.xlsx

New sheets included:
   1. All Orders with Extended Risk Scoring
   2. Critical Risk (6+ flags)
   3. High Risk (4+ flags)
   4. High-Value Takeaway/Dine-in
   5. Self-Voids
   6. Quick Voids
   7. Takeaway Voids
   8. Dine-in Voids
   9. Closing Time Voids
   10. Cashier Mistakes
   11. Double Punch
   12. Staff Concentration
   13. Cross-Void Pairs
   14. Order Type Stats
   15. Extended Summary


---
## Extended Analysis Complete

### New Fraud Flags Added:
| Flag | Points | Description |
|------|--------|-------------|
| Takeaway Order | 1 | Takeaway orders are higher risk |
| Dine-in Order | 1 | Dine-in orders are higher risk |
| Self-Void | 2 | Staff voiding their own orders |
| Quick Void | 1 | Voided within 30 minutes |
| Closing Time | 1 | Voided between 9pm-11pm |

### Extended Risk Levels:
- **Critical (6+ points)**: Immediate investigation required
- **High (4-5 points)**: Priority review needed
- **Medium (2-3 points)**: Monitor and verify
- **Low (0-1 points)**: Standard void

### Key Detection Patterns:
1. **Takeaway/Dine-in Focus**: These order types are higher risk as food is already prepared/consumed
2. **Self-Voiding**: Staff voiding their own orders indicates potential collusion
3. **Quick Voids**: Orders voided within 30 minutes may be pre-planned
4. **Consecutive Voids**: Multiple voids in rapid succession by same person
5. **Cross-Voiding**: Patterns of specific staff voiding each other's orders
6. **Closing Time**: Reduced supervision during closing hours
7. **Staff Concentration**: High void activity concentrated at one outlet
8. **Cashier Mistake Abuse**: Excessive use of "cashier mistake" as reason