In [124]:
import os
import dash
import math
import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go
import dash_bootstrap_components as dbc

from datetime import datetime, timedelta
from plotly.subplots import make_subplots
from dash import dcc, callback, html, Input, Output, dash_table

In [126]:
df = pd.read_csv('eda_fraud_balanced_sorted.csv')

print(df.shape)
df.head()

(15012, 16)


Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,gender,city,state,lat,long,city_pop,job,unix_time,merch_lat,merch_long,is_fraud
0,2019-01-01 07:11:55,377026671291680,fraud_Conroy-Cruickshank,gas_transport,71.06,F,Redford,MO,37.3272,-91.0243,241,Investment analyst,1325401915,36.447073,-91.941927,0
1,2019-01-01 10:10:27,2706977570537524,fraud_Huels-Nolan,gas_transport,70.17,M,Zaleski,OH,39.283,-82.3977,341,Accounting technician,1325412627,38.340454,-81.412293,0
2,2019-01-01 10:45:15,30044330818990,fraud_DuBuque LLC,grocery_pos,176.63,F,Fort Myers,FL,26.4722,-81.8122,224256,Paramedic,1325414715,26.258936,-81.972659,0
3,2019-01-01 11:26:05,376656886990758,fraud_Berge LLC,gas_transport,68.32,M,Zavalla,TX,31.1569,-94.3871,2836,"Designer, jewellery",1325417165,31.680455,-94.08424,0
4,2019-01-01 12:26:57,30487648872433,fraud_Bednar Inc,travel,5.09,F,Central,IN,38.097,-86.1723,350,Counsellor,1325420817,37.653705,-86.315351,0


## Transaction Amount Distribution

In [128]:
df['log_amt'] = np.log(df['amt'])

fig = px.histogram(df, x='log_amt', color='is_fraud',
                   title='Log-Scaled Transaction Amount Distribution',
                   nbins=50, opacity=0.8,
                   labels={'log_amt': 'Log(Transaction Amount)'},
                  color_discrete_map={0: 'blue', 1: 'orange'})

app = dash.Dash(__name__)
app.layout = html.Div([
    html.H1("Transaction Amount Distribution"),
    dcc.Graph(id='graph-amount', figure=fig)
])

if __name__ == '__main__':
    app.run(debug=True)

## Fraud Detection Dashboard: Merchant Category Insights

In [131]:
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

max_fraud_rate = df.groupby(['category', 'is_fraud']).size().unstack().fillna(0)
max_fraud_rate['fraud_rate'] = max_fraud_rate[1] / (max_fraud_rate[0] + max_fraud_rate[1]) * 100
max_value = math.ceil(max_fraud_rate['fraud_rate'].max() / 5) * 5

app.layout = dbc.Container([
    # Header
    dbc.Row([
        dbc.Col([
            html.H1("Fraud Analysis by Merchant Category", 
                   className="text-center mb-4 text-primary",
                   style={'fontWeight': 'bold'})
        ])
    ]),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4("Interactive Filters", className="mb-0 text-info")
                ]),
                dbc.CardBody([
                    dbc.Row([
                        dbc.Col([
                            html.Label("Filter by minimum fraud rate (%):", 
                                     className="fw-bold mb-2"),
                            dcc.Slider(
                                id='fraud-filter',
                                min=0,
                                max=max_value,
                                step=0.5,
                                value=0,
                                marks={i: str(i) for i in range(0, int(max_value) + 1, 5)},
                                tooltip={"placement": "bottom", "always_visible": True}
                            )
                        ], md=8),
                        dbc.Col([
                            html.Label("Chart Type:", className="fw-bold mb-2"),
                            dbc.RadioItems(
                                id='chart-type',
                                options=[
                                    {'label': 'Absolute Count', 'value': 'count'},
                                    {'label': 'Percentage', 'value': 'percent'}
                                ],
                                value='count',
                                inline=True
                            )
                        ], md=4)
                    ])
                ])
            ], className="shadow-sm")
        ])
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4("Category Analysis Chart", className="mb-0 text-info")
                ]),
                dbc.CardBody([
                    dcc.Graph(id='fraud-chart')
                ])
            ], className="shadow-sm")
        ])
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4("Detailed Statistics", className="mb-0 text-info")
                ]),
                dbc.CardBody([
                    html.Div(id='stats-table')
                ])
            ], className="shadow-sm")
        ])
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            html.Hr(className="my-4"),
            dbc.Alert([
                html.H5("Analysis Description", className="alert-heading"),
                html.P([
                    "This interactive analysis examines fraud patterns across different merchant categories. ",
                    "Use the slider to filter categories by minimum fraud rate and toggle between absolute counts and percentages."
                ]),
                html.Hr(),
                html.H6("Key Insights:", className="fw-bold text-danger"),
                html.P([
                    "This analysis reveals which merchant categories are most vulnerable to fraudulent activities. ",
                    "The interactive filters allow you to focus on high-risk categories and understand both the volume and rate of fraud."
                ]),
                html.P([
                    html.Strong("Business Applications: "),
                    html.Br(),
                    "• Identify high-risk merchant categories for enhanced monitoring",
                    html.Br(),
                    "• Implement category-specific fraud prevention strategies",
                    html.Br(),
                    "• Optimize resource allocation based on fraud concentration",
                    html.Br(),
                    "• Develop targeted risk assessment models for different business types"
                ], className="mb-2"),
                html.P([
                    html.Strong("Strategic Value: "), 
                    "Understanding fraud distribution by merchant category enables proactive risk management, ",
                    "helping businesses implement preventive measures before fraud patterns escalate. ",
                    "This data-driven approach can significantly reduce financial losses and improve customer trust."
                ], className="mb-0 text-muted")
            ], color="light", className="border")
        ])
    ])
    
], fluid=True, className="py-4")

@callback(
    [Output('fraud-chart', 'figure'),
     Output('stats-table', 'children')],
    [Input('fraud-filter', 'value'),
     Input('chart-type', 'value')]
)
def update_chart(min_fraud_rate, chart_type):
    fraud_stats = df.groupby(['category', 'is_fraud']).size().unstack().fillna(0)
    fraud_stats['fraud_rate'] = fraud_stats[1] / (fraud_stats[0] + fraud_stats[1]) * 100
    filtered_stats = fraud_stats[fraud_stats['fraud_rate'] >= min_fraud_rate].rename(columns={0: 'Not Fraud', 1: 'Fraud'})
    
    if chart_type == 'count':
        fig = px.bar(
            filtered_stats.reset_index(),
            x='category',
            y=['Not Fraud', 'Fraud'],
            title=f"Fraud Distribution by Category (≥{min_fraud_rate}% fraud rate)",
            labels={'value': 'Transaction Count', 'variable': 'Type'},
            color_discrete_map={'Not Fraud': '#3498db', 'Fraud': 'orange'}
        )
    else:
        fig = px.bar(
            filtered_stats.reset_index(),
            x='category',
            y='fraud_rate',
            title=f"Fraud Rate by Category (≥{min_fraud_rate}%)",
            labels={'fraud_rate': 'Fraud Rate (%)'},
            color='fraud_rate',
            color_continuous_scale='Reds'
        )
    
    fig.update_layout(
        xaxis_tickangle=-45,
        hovermode='x unified',
        height=600,
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)'
    )
    
    if len(filtered_stats) > 0:
        stats_table = dbc.Table([
            html.Thead([
                html.Tr([
                    html.Th("Category", className="text-center"), 
                    html.Th("Total Transactions", className="text-center"), 
                    html.Th("Fraudulent", className="text-center"), 
                    html.Th("Fraud Rate (%)", className="text-center")
                ])
            ]),
            html.Tbody([
                html.Tr([
                    html.Td(cat, className="fw-bold"),
                    html.Td(f"{int(row.iloc[0] + row.iloc[1]):,}", className="text-center"),
                    html.Td(f"{int(row.iloc[1]):,}", className="text-center text-danger fw-bold"),
                    html.Td(f"{row['fraud_rate']:.2f}%", 
                           className="text-center fw-bold",
                           style={'color': '#e74c3c' if row['fraud_rate'] > 10 else '#f39c12' if row['fraud_rate'] > 5 else '#27ae60'})
                ]) for cat, row in filtered_stats.iterrows()
            ])
        ], striped=True, bordered=True, hover=True, responsive=True, className="mt-3")
    else:
        stats_table = dbc.Alert("No categories match the selected criteria.", color="warning")
    
    return fig, stats_table

if __name__ == '__main__':
    app.run(debug=True, port=8051)

## Real-Time Fraud Monitoring Dashboard

In [133]:
df['trans_date_trans_time'] = pd.to_datetime(df['trans_date_trans_time'])
df['hour'] = df['trans_date_trans_time'].dt.hour

fig = px.histogram(
    df, 
    x='hour', 
    color='is_fraud',
    barmode='group',
    title='<b>Hourly Transaction Analysis</b><br><sup>Normal vs Fraudulent Activity Patterns</sup>',
    labels={
        'hour': 'Hour of Day (24h format)',
        'count': 'Transaction Count',
        'is_fraud': 'Transaction Type'
    },
    opacity=0.85,
    color_discrete_map={0: '#1f77b4', 1: '#ff7f0e'},  
    template='plotly_white'
)

fig.update_layout(
    hovermode='x unified',
    legend_title_text='',
    legend=dict(orientation='h', yanchor='bottom', y=1.02)
)

app = dash.Dash(__name__)

app.layout = html.Div([
    html.Div(
        className='app-header',
        children=[
            html.H1('Real-Time Fraud Monitoring Dashboard', 
                   style={'textAlign': 'center', 'color': '#2c3e50'})
        ]
    ),
    
    html.Div(
        className='app-description',
        children=[
            html.P('Explore temporal patterns in transaction fraud risk', 
                  style={'textAlign': 'center', 'fontSize': 16})
        ]
    ),
    
    dcc.Graph(
        id='hourly-analysis',
        figure=fig,
        config={'displayModeBar': True}
    ),

    html.P(
        "📌 Fraud spikes around 10 PM and 11 PM suggest increased suspicious activity late in the day. "
        "This may reflect an attempt to exploit reduced monitoring during off-peak hours or operational handovers.",
        style={
            'textAlign': 'center',
            'fontStyle': 'italic',
            'marginTop': '10px',
            'color': '#555'
        }
    ),
    
    html.Div(
        className='app-footer',
        children=[
            html.P('Data updated: ' + pd.Timestamp.now().strftime('%Y-%m-%d')),
            html.P('Filter range: 00:00 - 23:59 (UTC)')
        ]
    )
])

if __name__ == '__main__':
    app.run(debug=True, port=8052)

## HOUR STATISTICS

In [135]:
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

df['transaction_date'] = pd.to_datetime(df['trans_date_trans_time'])
df['transaction_hour'] = df['transaction_date'].dt.hour

hourly_stats = df.groupby('transaction_hour').agg({
    'is_fraud': ['count', 'sum']
}).reset_index()

hourly_stats.columns = ['Hour', 'Transactions', 'Frauds']
hourly_stats['Rate (%)'] = (hourly_stats['Frauds'] / hourly_stats['Transactions'] * 100).round(2)

table_fig = go.Figure(data=[go.Table(
    header=dict(values=['Hour', 'Transactions', 'Frauds', 'Rate (%)'],
                fill_color='paleturquoise',
                align='center',
                font=dict(size=12)),
    cells=dict(values=[hourly_stats['Hour'], 
                      hourly_stats['Transactions'], 
                      hourly_stats['Frauds'], 
                      hourly_stats['Rate (%)']],
               fill_color='lavender',
               align='center',
               font=dict(size=11)))
])
table_fig.update_layout(title="HOUR STATISTICS", height=600)

fig = make_subplots(
    rows=2, cols=1,
    subplot_titles=('Number of Transactions by Hour', 'Fraud Rate by Hour'),
    specs=[[{"secondary_y": False}], [{"secondary_y": False}]],
    vertical_spacing=0.12
)

fig.add_trace(
    go.Bar(
        x=hourly_stats['Hour'],
        y=hourly_stats['Transactions'] - hourly_stats['Frauds'],
        name='Normal Transactions',
        marker_color='blue'
    ),
    row=1, col=1
)

fig.add_trace(
    go.Bar(
        x=hourly_stats['Hour'],
        y=hourly_stats['Frauds'],
        name='Frauds',
        marker_color='orange'
    ),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(
        x=hourly_stats['Hour'],
        y=hourly_stats['Rate (%)'],
        mode='lines+markers',
        name='Fraud Rate (%)',
        line=dict(color='orange', width=3),
        marker=dict(size=8)
    ),
    row=2, col=1
)

fig.update_layout(
    title='Fraud Analysis by Hour of Day',
    height=800,
    showlegend=True,
    barmode='stack'
)

fig.update_xaxes(title_text="Hour", row=1, col=1)
fig.update_xaxes(title_text="Hour", row=2, col=1)
fig.update_yaxes(title_text="Number of Transactions", row=1, col=1)
fig.update_yaxes(title_text="Fraud Rate (%)", row=2, col=1)

# Dash application layout
app.layout = dbc.Container([
    dbc.Row([
        dbc.Col([
            html.H1("Fraud Analysis by Hour", 
                   className="text-center mb-4 text-primary",
                   style={'fontWeight': 'bold'})
        ])
    ]),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H3("Hourly Statistics", className="mb-0 text-info")
                ]),
                dbc.CardBody([
                    dcc.Graph(
                        id='hourly-stats-table',
                        figure=table_fig
                    )
                ])
            ], className="shadow-sm")
        ])
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H3("Analysis Charts", className="mb-0 text-info")
                ]),
                dbc.CardBody([
                    dcc.Graph(
                        id='fraud-analysis-chart',
                        figure=fig
                    )
                ])
            ], className="shadow-sm")
        ])
    ]),
    
    dbc.Row([
        dbc.Col([
            html.Hr(className="my-4"),
            dbc.Alert([
                html.H5("Analysis Description", className="alert-heading"),
                html.P([
                    "This analysis shows the distribution of transactions and frauds by hour of day. ",
                    "The top chart shows the total number of transactions (normal in blue, frauds in orange). ",
                    "The bottom chart shows the fraud rate percentage by hour."
                ]),
                html.Hr(),
                html.H6("Key Findings:", className="fw-bold text-danger"),
                html.P([
                    "We observe that the hours with the highest fraud percentages are during late night and early morning hours: ",
                    "10PM, 11PM, 12AM, 1AM, 2AM, and 3 AM. These critical time periods show significantly elevated fraud activity:"
                ]),
                html.Ul([
                    html.Li([html.Strong("10:00 PM:"), " 1,931 fraudulent transactions (85.29%)"]),
                    html.Li([html.Strong("11:00 PM:"), " 1,904 fraudulent transactions (85.57%)"]),
                    html.Li([html.Strong("12:00 AM:"), " 635 fraudulent transactions (72.49%)"]),
                    html.Li([html.Strong("1:00 AM:"), " 658 fraudulent transactions (73.19%)"]),
                    html.Li([html.Strong("2:00 AM:"), " 625 fraudulent transactions (71.27%)"]),
                    html.Li([html.Strong("3:00 AM:"), " 609 fraudulent transactions (71.56%)"])
                ], className="mb-2"),
                html.P([
                    html.Strong("Business Impact: "), 
                    "These findings suggest implementing enhanced security measures and monitoring during these high-risk hours (10PM-3AM) ",
                    "could significantly reduce fraud exposure and protect both customers and business operations."
                ], className="mb-0 text-muted")
            ], color="light", className="border")
        ])
    ], className="mt-4")
    
], fluid=True, className="py-4")

if __name__ == '__main__':
    app.run(debug=True, port=8053)

## Interactive Fraud Trends by Day of the Week

In [137]:
df['transaction_date'] = pd.to_datetime(df['trans_date_trans_time'])
df['day_of_week'] = df['transaction_date'].dt.dayofweek
day_names = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday',
             4: 'Friday', 5: 'Saturday', 6: 'Sunday'}
df['day_of_week'] = df['day_of_week'].map(day_names)

day_stats = df.groupby('day_of_week').agg({
    'is_fraud': ['count', 'sum', 'mean']
}).round(4)
day_stats.columns = ['total_transactions', 'fraud_count', 'fraud_rate']
day_stats = day_stats.reset_index()

day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
day_stats = day_stats.set_index('day_of_week').reindex(day_order).reset_index()

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = dbc.Container([
    
    dbc.Row([
        dbc.Col([
            html.H1("🔍 Fraud Analysis by Day of the Week", 
                   className="text-center mb-4 text-primary"),
            html.P("Interactive dashboard to analyze fraud patterns by day",
                  className="text-center text-muted mb-4")
        ])
    ]),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4(f"{len(df):,}", className="text-primary mb-0"),
                    html.P("Total Transactions", className="text-muted")
                ])
            ], className="text-center")
        ], width=3),
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4(f"{df['is_fraud'].sum():,}", className="text-danger mb-0"),
                    html.P("Fraudulent Transactions", className="text-muted")
                ])
            ], className="text-center")
        ], width=3),
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4(f"{df['is_fraud'].mean()*100:.2f}%", className="text-warning mb-0"),
                    html.P("Global Fraud Rate", className="text-muted")
                ])
            ], className="text-center")
        ], width=3),
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4(f"{day_stats['fraud_rate'].max()*100:.2f}%", className="text-info mb-0"),
                    html.P("Max Rate per Day", className="text-muted")
                ])
            ], className="text-center")
        ], width=3)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H5("Display Options", className="card-title"),
                    dbc.Row([
                        dbc.Col([
                            html.Label("Chart type:", className="form-label"),
                            dcc.Dropdown(
                                id='chart-type',
                                options=[
                                    {'label': '📊 Grouped Histogram', 'value': 'histogram'},
                                    {'label': '📈 Fraud Rate', 'value': 'rate'},
                                    {'label': '🔄 Comparison', 'value': 'comparison'}
                                ],
                                value='histogram',
                                clearable=False
                            )
                        ], width=6),
                        dbc.Col([
                            html.Label("Color palette:", className="form-label"),
                            dcc.Dropdown(
                                id='color-scheme',
                                options=[
                                    {'label': '🔵 Blue-Orange', 'value': 'blue_orange'},
                                    {'label': '🔴 Red-Green', 'value': 'red_green'},
                                    {'label': '🟣 Viridis', 'value': 'viridis'},
                                    {'label': '🌈 Plotly', 'value': 'plotly'}
                                ],
                                value='blue_orange',
                                clearable=False
                            )
                        ], width=6)
                    ])
                ])
            ])
        ])
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    dcc.Graph(id='main-chart', style={'height': '500px'})
                ])
            ])
        ], width=8),
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H5("📊 Detailed Statistics", className="card-title mb-3"),
                    html.Div(id='stats-table')
                ])
            ])
        ], width=4)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H5("📈 Time Evolution", className="card-title"),
                    dcc.Graph(id='time-series-chart', style={'height': '400px'})
                ])
            ])
        ])
    ])
], fluid=True)

@app.callback(
    [Output('main-chart', 'figure'),
     Output('stats-table', 'children'),
     Output('time-series-chart', 'figure')],
    [Input('chart-type', 'value'),
     Input('color-scheme', 'value')]
)
def update_charts(chart_type, color_scheme):
    color_maps = {
        'blue_orange': {0: '#1f77b4', 1: '#ff7f0e'},
        'red_green': {0: '#2ca02c', 1: '#d62728'},
        'viridis': {0: '#440154', 1: '#fde725'},
        'plotly': {0: '#636efa', 1: '#ef553b'}
    }
    colors = color_maps[color_scheme]
    
    if chart_type == 'histogram':
        fig_main = px.histogram(
            df, x='day_of_week', color='is_fraud',
            category_orders={'day_of_week': day_order},
            title='Transaction Distribution by Day of the Week',
            labels={'day_of_week': 'Day of the Week', 'count': 'Number of Transactions'},
            barmode='group',
            color_discrete_map=colors
        )
        fig_main.update_layout(
            plot_bgcolor='rgba(0,0,0,0)',
            paper_bgcolor='rgba(0,0,0,0)',
            font=dict(size=12),
            title_font_size=16,
            legend_title_text="Transaction Type",
            legend=dict(
                orientation="h",
                yanchor="bottom",
                y=1.02,
                xanchor="right",
                x=1
            )
        )
        fig_main.update_traces(opacity=0.8)
        
    elif chart_type == 'rate':
        fig_main = px.bar(
            day_stats, x='day_of_week', y='fraud_rate',
            title='Fraud Rate by Day of the Week',
            labels={'day_of_week': 'Day of the Week', 'fraud_rate': 'Fraud Rate'},
            color='fraud_rate',
            color_continuous_scale='Reds'
        )
        fig_main.update_layout(
            plot_bgcolor='rgba(0,0,0,0)',
            paper_bgcolor='rgba(0,0,0,0)',
            font=dict(size=12),
            title_font_size=16,
            xaxis_categoryorder='array',
            xaxis_categoryarray=day_order
        )
        
    else:  
        fig_main = go.Figure()
        
        fig_main.add_trace(go.Bar(
            name='Normal Transactions',
            x=day_order,
            y=[day_stats[day_stats['day_of_week']==day]['total_transactions'].iloc[0] - 
               day_stats[day_stats['day_of_week']==day]['fraud_count'].iloc[0] 
               for day in day_order],
            marker_color=colors[0],
            opacity=0.8
        ))
        
        fig_main.add_trace(go.Bar(
            name='Fraudulent Transactions',
            x=day_order,
            y=[day_stats[day_stats['day_of_week']==day]['fraud_count'].iloc[0] 
               for day in day_order],
            marker_color=colors[1],
            opacity=0.8
        ))
        
        fig_main.update_layout(
            title='Transaction Comparison by Day',
            xaxis_title='Day of the Week',
            yaxis_title='Number of Transactions',
            barmode='stack',
            plot_bgcolor='rgba(0,0,0,0)',
            paper_bgcolor='rgba(0,0,0,0)',
            font=dict(size=12),
            title_font_size=16,
            legend=dict(
                orientation="h",
                yanchor="bottom",
                y=1.02,
                xanchor="right",
                x=1
            )
        )
    
    stats_cards = []
    for day in day_order:  
        row = day_stats[day_stats['day_of_week'] == day].iloc[0]
        card = dbc.Card([
            dbc.CardBody([
                html.H6(day, className="card-title text-primary"),
                html.P([
                    html.Strong(f"{row['total_transactions']:,}"), " total transactions"
                ], className="card-text mb-1"),
                html.P([
                    html.Strong(f"{row['fraud_count']:,}", className="text-danger"), " frauds"
                ], className="card-text mb-1"),
                html.P([
                    html.Strong(f"{row['fraud_rate']*100:.2f}%", className="text-warning"), " rate"
                ], className="card-text mb-0")
            ])
        ], className="mb-2")
        stats_cards.append(card)
    
    df_weekly = df.copy()
    df_weekly['week'] = df_weekly['transaction_date'].dt.to_period('W')
    weekly_fraud = df_weekly.groupby(['week', 'day_of_week'])['is_fraud'].mean().reset_index()
    weekly_fraud['week_str'] = weekly_fraud['week'].astype(str)
    
    fig_time = px.line(
        weekly_fraud.tail(len(day_order)*4),  
        x='week_str', y='is_fraud', color='day_of_week',
        title='Fraud Rate Evolution (Last 4 weeks)',
        labels={'week_str': 'Week', 'is_fraud': 'Fraud Rate', 'day_of_week': 'Day of Week'},
        category_orders={'day_of_week': day_order}
    )
    fig_time.update_layout(
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        font=dict(size=12),
        title_font_size=14,
        legend_title_text="Day of Week"
    )
    fig_time.update_traces(line=dict(width=2), marker=dict(size=6))
    
    return fig_main, stats_cards, fig_time

if __name__ == '__main__':
    app.run(debug=True, port=8054)

## Fraud Detection Dashboard - Day of Week Analysis

In [139]:
filtered_df = None

df['transaction_date'] = pd.to_datetime(df['trans_date_trans_time'])
df['day_of_week'] = df['transaction_date'].dt.dayofweek
day_names = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday',
             4: 'Friday', 5: 'Saturday', 6: 'Sunday'}
df['day_of_week'] = df['day_of_week'].map(day_names)

daily_stats = df.groupby('day_of_week').agg({
    'is_fraud': ['count', 'sum'],
    'amt': ['mean', 'sum']
}).reset_index()

daily_stats.columns = ['Day', 'Total_Transactions', 'Total_Frauds', 'Avg_Amount', 'Total_Amount']
daily_stats['Fraud_Rate'] = (daily_stats['Total_Frauds'] / daily_stats['Total_Transactions'] * 100).round(2)

day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
daily_stats['Day'] = pd.Categorical(daily_stats['Day'], categories=day_order, ordered=True)
daily_stats = daily_stats.sort_values('Day')

def create_fraud_histogram(data_df):
    fig = px.histogram(data_df, x='day_of_week', color='is_fraud',
                       category_orders={'day_of_week': day_order},
                       title='Fraud Occurrence by Day of the Week',
                       labels={'day_of_week': 'Day of the Week', 'count': 'Number of Transactions'},
                       barmode='group',
                       opacity=0.8,
                       height=500,
                       color_discrete_map={0: 'lightblue', 1: 'orange'})
    
    fig.for_each_trace(lambda t: t.update(name='Normal' if t.name == '0' else 'Fraud'))
    
    fig.update_layout(legend_title_text='Transaction Type')
    
    return fig

def create_fraud_rate_chart(data_df):
    filtered_daily_stats = data_df.groupby('day_of_week').agg({
        'is_fraud': ['count', 'sum'],
        'amt': ['mean', 'sum']
    }).reset_index()
    
    filtered_daily_stats.columns = ['Day', 'Total_Transactions', 'Total_Frauds', 'Avg_Amount', 'Total_Amount']
    filtered_daily_stats['Fraud_Rate'] = (filtered_daily_stats['Total_Frauds'] / filtered_daily_stats['Total_Transactions'] * 100).round(2)
    
    filtered_daily_stats['Day'] = pd.Categorical(filtered_daily_stats['Day'], categories=day_order, ordered=True)
    filtered_daily_stats = filtered_daily_stats.sort_values('Day')
    
    fig = px.line(filtered_daily_stats, x='Day', y='Fraud_Rate',
                  title='Fraud Rate by Day of Week',
                  markers=True,
                  line_shape='spline',
                  height=400)
    fig.update_traces(line_color='orange', marker_size=8)
    fig.update_layout(yaxis_title='Fraud Rate (%)')
    return fig

def create_amount_analysis(data_df):
    fraud_amounts = data_df[data_df['is_fraud'] == 1].groupby('day_of_week')['amt'].mean().reset_index()
    normal_amounts = data_df[data_df['is_fraud'] == 0].groupby('day_of_week')['amt'].mean().reset_index()
    
    fig = go.Figure()
    
    if not fraud_amounts.empty:
        fig.add_trace(go.Bar(x=fraud_amounts['day_of_week'], y=fraud_amounts['amt'],
                             name='Fraud Avg Amount', marker_color='#27ae60', opacity=0.7))
    if not normal_amounts.empty:
        fig.add_trace(go.Bar(x=normal_amounts['day_of_week'], y=normal_amounts['amt'],
                             name='Normal Avg Amount', marker_color='lightblue', opacity=0.7))
    
    fig.update_layout(title='Average Transaction Amount by Day',
                      xaxis_title='Day of Week',
                      yaxis_title='Average Amount ($)',
                      barmode='group',
                      height=400)
    return fig

def create_heatmap(data_df):
    df_temp = data_df.copy()
    df_temp['hour'] = df_temp['transaction_date'].dt.hour
    heatmap_data = df_temp.groupby(['day_of_week', 'hour'])['is_fraud'].sum().reset_index()
    
    heatmap_pivot = heatmap_data.pivot(index='hour', columns='day_of_week', values='is_fraud')
    
    for day in day_order:
        if day not in heatmap_pivot.columns:
            heatmap_pivot[day] = 0
    
    heatmap_pivot = heatmap_pivot.reindex(columns=day_order)
    heatmap_pivot = heatmap_pivot.fillna(0)  
    
    fig = px.imshow(heatmap_pivot.T, 
                    title='Fraud Heatmap: Day vs Hour',
                    labels=dict(x="Hour", y="Day", color="Fraud Count"),
                    aspect="auto",
                    color_continuous_scale='Blues',
                    height=400)
    return fig

app = dash.Dash(__name__)

app.layout = html.Div([
    html.Div([
        html.H1("Fraud Detection Dashboard - Day of Week Analysis", 
               style={'textAlign': 'center', 'marginBottom': '30px', 'color': '#2c3e50'})
    ]),
    
    html.Div(id='stats-cards', style={'marginBottom': '30px'}),
    
    html.Div([
        html.Div([
            html.H5("Visualization Controls", style={'marginBottom': '15px'}),
            html.Div([
                html.Div([
                    html.Label("Select Chart Type:", style={'marginBottom': '5px'}),
                    dcc.Dropdown(
                        id='chart-type-dropdown',
                        options=[
                            {'label': 'Fraud Histogram', 'value': 'histogram'},
                            {'label': 'Fraud Rate Line Chart', 'value': 'line'},
                            {'label': 'Amount Analysis', 'value': 'amount'},
                            {'label': 'Fraud Heatmap', 'value': 'heatmap'}
                        ],
                        value='histogram'
                    )
                ], style={'width': '48%', 'display': 'inline-block'}),
                html.Div([
                    html.Label("Day Filter:", style={'marginBottom': '5px'}),
                    dcc.Dropdown(
                        id='day-filter',
                        options=[{'label': 'All Days', 'value': 'all'}] + 
                                [{'label': day, 'value': day} for day in day_order],
                        value=['all'],
                        multi=True
                    )
                ], style={'width': '48%', 'float': 'right', 'display': 'inline-block'})
            ])
        ], style={'backgroundColor': '#f8f9fa', 'padding': '20px', 'border': '1px solid #dee2e6', 
                 'borderRadius': '5px', 'marginBottom': '30px'})
    ]),
    
    html.Div([
        dcc.Graph(id='main-chart')
    ], style={'marginBottom': '30px'}),
    
    html.Div([
        html.H4("Daily Statistics Table", style={'marginBottom': '15px'}),
        html.Div(id='stats-table-container')
    ])
], style={'margin': '20px'})

@app.callback(
    [Output('main-chart', 'figure'),
     Output('stats-cards', 'children'),
     Output('stats-table-container', 'children')],
    [Input('chart-type-dropdown', 'value'),
     Input('day-filter', 'value')]
)
def update_dashboard(chart_type, day_filter):
    global filtered_df
    
    filtered_df = df.copy()
    
    if day_filter != ['all'] and isinstance(day_filter, list) and len(day_filter) > 0:
        filtered_df = filtered_df[filtered_df['day_of_week'].isin(day_filter)]
    
    if filtered_df.empty:
        empty_fig = go.Figure()
        empty_fig.update_layout(title="No data available for selected filters")
        return empty_fig, html.Div("No data available"), html.Div("No data available")
    
    filtered_daily_stats = filtered_df.groupby('day_of_week').agg({
        'is_fraud': ['count', 'sum'],
        'amt': ['mean', 'sum']
    }).reset_index()
    
    filtered_daily_stats.columns = ['Day', 'Total_Transactions', 'Total_Frauds', 'Avg_Amount', 'Total_Amount']
    filtered_daily_stats['Fraud_Rate'] = (filtered_daily_stats['Total_Frauds'] / filtered_daily_stats['Total_Transactions'] * 100).round(2)
    
    filtered_daily_stats['Day'] = pd.Categorical(filtered_daily_stats['Day'], categories=day_order, ordered=True)
    filtered_daily_stats = filtered_daily_stats.sort_values('Day')
    
    if chart_type == 'histogram':
        fig = create_fraud_histogram(filtered_df)
    elif chart_type == 'line':
        fig = create_fraud_rate_chart(filtered_df)
    elif chart_type == 'amount':
        fig = create_amount_analysis(filtered_df)
    elif chart_type == 'heatmap':
        fig = create_heatmap(filtered_df)
    
    total_transactions = filtered_daily_stats['Total_Transactions'].sum()
    total_frauds = filtered_daily_stats['Total_Frauds'].sum()
    overall_fraud_rate = (total_frauds / total_transactions * 100) if total_transactions > 0 else 0
    avg_amount = filtered_daily_stats['Avg_Amount'].mean()
    
    stats_cards = html.Div([
        html.Div([
            html.Div([
                html.H4(f"{total_transactions:,}", 
                       style={'color': '#3498db', 'margin': '0'}),
                html.P("Total Transactions", style={'margin': '5px 0'})
            ], style={'backgroundColor': '#f8f9fa', 'padding': '20px', 'border': '1px solid #dee2e6', 
                     'borderRadius': '5px', 'textAlign': 'center'})
        ], style={'width': '23%', 'display': 'inline-block', 'margin': '1%'}),
        
        html.Div([
            html.Div([
                html.H4(f"{total_frauds:,}", 
                       style={'color': '#e74c3c', 'margin': '0'}),
                html.P("Total Frauds", style={'margin': '5px 0'})
            ], style={'backgroundColor': '#f8f9fa', 'padding': '20px', 'border': '1px solid #dee2e6', 
                     'borderRadius': '5px', 'textAlign': 'center'})
        ], style={'width': '23%', 'display': 'inline-block', 'margin': '1%'}),
        
        html.Div([
            html.Div([
                html.H4(f"{overall_fraud_rate:.2f}%", 
                       style={'color': '#f39c12', 'margin': '0'}),
                html.P("Overall Fraud Rate", style={'margin': '5px 0'})
            ], style={'backgroundColor': '#f8f9fa', 'padding': '20px', 'border': '1px solid #dee2e6', 
                     'borderRadius': '5px', 'textAlign': 'center'})
        ], style={'width': '23%', 'display': 'inline-block', 'margin': '1%'}),
        
        html.Div([
            html.Div([
                html.H4(f"${avg_amount:.2f}", 
                       style={'color': '#27ae60', 'margin': '0'}),
                html.P("Avg Transaction Amount", style={'margin': '5px 0'})
            ], style={'backgroundColor': '#f8f9fa', 'padding': '20px', 'border': '1px solid #dee2e6', 
                     'borderRadius': '5px', 'textAlign': 'center'})
        ], style={'width': '23%', 'display': 'inline-block', 'margin': '1%'})
    ])
    
    max_fraud_rate_day = filtered_daily_stats.loc[filtered_daily_stats['Fraud_Rate'].idxmax(), 'Day'] if not filtered_daily_stats.empty else None
    
    stats_table = dash_table.DataTable(
        id='stats-table',
        columns=[
            {'name': 'Day', 'id': 'Day'},
            {'name': 'Total Transactions', 'id': 'Total_Transactions', 'type': 'numeric', 'format': {'specifier': ','}},
            {'name': 'Total Frauds', 'id': 'Total_Frauds', 'type': 'numeric', 'format': {'specifier': ','}},
            {'name': 'Fraud Rate (%)', 'id': 'Fraud_Rate', 'type': 'numeric', 'format': {'specifier': '.2f'}},
            {'name': 'Avg Amount ($)', 'id': 'Avg_Amount', 'type': 'numeric', 'format': {'specifier': '.2f'}},
            {'name': 'Total Amount ($)', 'id': 'Total_Amount', 'type': 'numeric', 'format': {'specifier': ',.2f'}}
        ],
        data=filtered_daily_stats.to_dict('records'),
        style_cell={'textAlign': 'center'},
        style_header={'backgroundColor': 'rgb(230, 230, 230)', 'fontWeight': 'bold'},
        style_data_conditional=[
            {
                'if': {'filter_query': f'{{Day}} = {max_fraud_rate_day}'},
                'backgroundColor': '#ffebee',
                'color': 'black',
            }
        ] if max_fraud_rate_day is not None else []
    )
    
    return fig, stats_cards, stats_table

if __name__ == '__main__':
    app.run(debug=True, port=8055)

## Interactive Fraud Trends by Month of the Year  

In [141]:
df['transaction_date'] = pd.to_datetime(df['trans_date_trans_time'])
df['month'] = df['transaction_date'].dt.month

month_names = {
    1: 'January', 2: 'February', 3: 'March', 4: 'April',
    5: 'May', 6: 'June', 7: 'July', 8: 'August',
    9: 'September', 10: 'October', 11: 'November', 12: 'December'
}
df['month'] = df['month'].map(month_names)

fig_month = px.histogram(df, x='month', color='is_fraud',
                         category_orders={'month': list(month_names.values())},
                         title='Fraud Occurrence by Month of the Year',
                         labels={'month': 'Month', 'count': 'Number of Transactions'},
                         height=600,
                         barmode='group', opacity=0.8, color_discrete_map={0: 'blue', 1: 'orange'})

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Fraud Detection - Monthly Analysis", style={'textAlign': 'center'}),
    dcc.Graph(id='fraud-by-month', figure=fig_month)
])

if __name__ == '__main__':
    app.run(debug=True, port=8056) 

In [143]:
df['transaction_date'] = pd.to_datetime(df['trans_date_trans_time'])
df['month'] = df['transaction_date'].dt.month
df['day_of_week'] = df['transaction_date'].dt.day_name()
df['hour'] = df['transaction_date'].dt.hour
df['date_only'] = df['transaction_date'].dt.date

month_names = {
    1: 'January', 2: 'February', 3: 'March', 4: 'April',
    5: 'May', 6: 'June', 7: 'July', 8: 'August',
    9: 'September', 10: 'October', 11: 'November', 12: 'December'
}
df['month_name'] = df['month'].map(month_names)

total_transactions = len(df)
fraud_transactions = len(df[df['is_fraud'] == 1])
fraud_rate = (fraud_transactions / total_transactions) * 100
legitimate_transactions = total_transactions - fraud_transactions

monthly_stats = df.groupby(['month', 'month_name']).agg({
    'is_fraud': ['count', 'sum', 'mean'],
    'amt': ['mean', 'sum', 'std']
}).reset_index()
monthly_stats.columns = ['month_num', 'month', 'total_transactions', 'fraud_count', 'fraud_rate', 
                        'avg_amount', 'total_amount', 'amount_std']
monthly_stats['fraud_rate'] = monthly_stats['fraud_rate'] * 100
monthly_stats = monthly_stats.sort_values('month_num').reset_index(drop=True)

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP, dbc.icons.BOOTSTRAP])

app.layout = dbc.Container([
    # Header
    dbc.Row([
        dbc.Col([
            html.H1([
                html.I(className="bi bi-shield-exclamation me-2"),
                "Monthly Fraud Detection Dashboard"
            ], className="text-center mb-4 mt-3", style={'color': '#2E86AB', 'font-weight': 'bold'})
        ], width=12)
    ]),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H5([
                        html.I(className="bi bi-sliders me-2"),
                        "Date Range Filter"
                    ], className="mb-0")
                ]),
                dbc.CardBody([
                    dbc.Row([
                        dbc.Col([
                            html.Label("Select Date Range:", className="fw-bold"),
                            dcc.DatePickerRange(
                                id='date-picker-range',
                                start_date=df['transaction_date'].min(),
                                end_date=df['transaction_date'].max(),
                                display_format='YYYY-MM-DD',
                                style={'width': '100%'}
                            )
                        ], width=6),
                        dbc.Col([
                            html.Label("Chart Type:", className="fw-bold"),
                            dcc.Dropdown(
                                id='chart-type-dropdown',
                                options=[
                                    {'label': 'Bar Chart - Grouped', 'value': 'bar_grouped'},
                                    {'label': 'Bar Chart - Stacked', 'value': 'bar_stacked'},
                                    {'label': 'Line Chart - Fraud Rate', 'value': 'line_rate'},
                                    {'label': 'Area Chart - Transactions', 'value': 'area_trans'}
                                ],
                                value='bar_grouped',
                                clearable=False
                            )
                        ], width=6)
                    ])
                ])
            ])
        ], width=12)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4([
                        html.I(className="bi bi-graph-up me-2"),
                        "Total Transactions"
                    ], className="card-title text-center"),
                    html.H2(id="total-transactions", className="text-center text-primary", 
                           style={'font-weight': 'bold'})
                ])
            ], color="light", outline=True)
        ], width=3),
        
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4([
                        html.I(className="bi bi-exclamation-triangle me-2"),
                        "Fraudulent"
                    ], className="card-title text-center"),
                    html.H2(id="fraud-transactions", className="text-center text-danger", 
                           style={'font-weight': 'bold'})
                ])
            ], color="light", outline=True)
        ], width=3),
        
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4([
                        html.I(className="bi bi-percent me-2"),
                        "Fraud Rate"
                    ], className="card-title text-center"),
                    html.H2(id="fraud-rate", className="text-center text-warning", 
                           style={'font-weight': 'bold'})
                ])
            ], color="light", outline=True)
        ], width=3),
        
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4([
                        html.I(className="bi bi-calendar3 me-2"),
                        "Peak Fraud Month"
                    ], className="card-title text-center"),
                    html.H2(id="peak-month", className="text-center text-info", 
                           style={'font-weight': 'bold', 'font-size': '1.2rem'})
                ])
            ], color="light", outline=True)
        ], width=3)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-bar-chart me-2"),
                        "Monthly Fraud Analysis"
                    ], className="mb-0 text-center")
                ]),
                dbc.CardBody([
                    dcc.Graph(id='main-chart')
                ])
            ])
        ], width=8),
        
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-pie-chart me-2"),
                        "Overall Distribution"
                    ], className="mb-0 text-center")
                ]),
                dbc.CardBody([
                    dcc.Graph(id='fraud-pie-chart')
                ])
            ])
        ], width=4)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-graph-up-arrow me-2"),
                        "Monthly Fraud Rate Trend"
                    ], className="mb-0 text-center")
                ]),
                dbc.CardBody([
                    dcc.Graph(id='fraud-rate-chart')
                ])
            ])
        ], width=6),
        
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-currency-dollar me-2"),
                        "Monthly Transaction Amounts"
                    ], className="mb-0 text-center")
                ]),
                dbc.CardBody([
                    dcc.Graph(id='amount-chart')
                ])
            ])
        ], width=6)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-speedometer2 me-2"),
                        "Fraud Rate Gauge by Month"
                    ], className="mb-0 text-center")
                ]),
                dbc.CardBody([
                    dcc.Graph(id='gauge-chart')
                ])
            ])
        ], width=6),
        
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-activity me-2"),
                        "Monthly Transaction Volume Heatmap"
                    ], className="mb-0 text-center")
                ]),
                dbc.CardBody([
                    dcc.Graph(id='heatmap-chart')
                ])
            ])
        ], width=6)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-table me-2"),
                        "Monthly Statistics Summary"
                    ], className="mb-0")
                ]),
                dbc.CardBody([
                    dash_table.DataTable(
                        id='monthly-stats-table',
                        columns=[
                            {'name': 'Month', 'id': 'month'},
                            {'name': 'Total Transactions', 'id': 'total_transactions', 'type': 'numeric', 'format': {'specifier': ','}},
                            {'name': 'Fraud Count', 'id': 'fraud_count', 'type': 'numeric', 'format': {'specifier': ','}},
                            {'name': 'Fraud Rate (%)', 'id': 'fraud_rate', 'type': 'numeric', 'format': {'specifier': '.2f'}},
                            {'name': 'Avg Amount ($)', 'id': 'avg_amount', 'type': 'numeric', 'format': {'specifier': ',.2f'}},
                            {'name': 'Total Amount ($)', 'id': 'total_amount', 'type': 'numeric', 'format': {'specifier': ',.0f'}},
                            {'name': 'Amount Std Dev', 'id': 'amount_std', 'type': 'numeric', 'format': {'specifier': ',.2f'}}
                        ],
                        style_cell={'textAlign': 'center', 'padding': '10px', 'font-size': '12px'},
                        style_header={'backgroundColor': '#2E86AB', 'color': 'white', 'fontWeight': 'bold'},
                        style_data_conditional=[
                            {
                                'if': {'row_index': 'odd'},
                                'backgroundColor': 'rgb(248, 248, 248)'
                            },
                            {
                                'if': {'filter_query': '{fraud_rate} > 5', 'column_id': 'fraud_rate'},
                                'backgroundColor': '#ffcccc',
                                'color': 'red',
                                'fontWeight': 'bold'
                            },
                            {
                                'if': {'filter_query': '{fraud_rate} > 3', 'column_id': 'fraud_rate'},
                                'backgroundColor': '#fff3cd',
                                'color': 'orange'
                            }
                        ],
                        sort_action="native",
                        style_table={'overflowX': 'scroll'}
                    )
                ])
            ])
        ], width=12)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-lightbulb me-2"),
                        "Monthly Analysis Insights"
                    ], className="mb-0")
                ]),
                dbc.CardBody([
                    html.Div(id="insights-content")
                ])
            ])
        ], width=12)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            html.Hr(),
            html.P([
                html.I(className="bi bi-info-circle me-2"),
                "Monthly Fraud Detection System - Advanced Analytics Dashboard"
            ], className="text-center text-muted")
        ], width=12)
    ])
    
], fluid=True)

@app.callback(
    [Output('total-transactions', 'children'),
     Output('fraud-transactions', 'children'),
     Output('fraud-rate', 'children'),
     Output('peak-month', 'children'),
     Output('main-chart', 'figure'),
     Output('fraud-pie-chart', 'figure'),
     Output('fraud-rate-chart', 'figure'),
     Output('amount-chart', 'figure'),
     Output('gauge-chart', 'figure'),
     Output('heatmap-chart', 'figure'),
     Output('monthly-stats-table', 'data'),
     Output('insights-content', 'children')],
    [Input('date-picker-range', 'start_date'),
     Input('date-picker-range', 'end_date'),
     Input('chart-type-dropdown', 'value')]
)
def update_dashboard(start_date, end_date, chart_type):
    filtered_df = df[(df['transaction_date'] >= start_date) & (df['transaction_date'] <= end_date)]
    
    total_trans = len(filtered_df)
    fraud_trans = len(filtered_df[filtered_df['is_fraud'] == 1])
    fraud_rt = (fraud_trans / total_trans) * 100 if total_trans > 0 else 0
    
    monthly_stats_filtered = filtered_df.groupby(['month', 'month_name']).agg({
        'is_fraud': ['count', 'sum', 'mean'],
        'amt': ['mean', 'sum', 'std']
    }).reset_index()
    monthly_stats_filtered.columns = ['month_num', 'month', 'total_transactions', 'fraud_count', 'fraud_rate', 
                                    'avg_amount', 'total_amount', 'amount_std']
    monthly_stats_filtered['fraud_rate'] = monthly_stats_filtered['fraud_rate'] * 100
    monthly_stats_filtered = monthly_stats_filtered.fillna(0)
    monthly_stats_filtered = monthly_stats_filtered.sort_values('month_num').reset_index(drop=True)
    
    peak_month = monthly_stats_filtered.loc[monthly_stats_filtered['fraud_rate'].idxmax(), 'month'] if len(monthly_stats_filtered) > 0 else "N/A"
    
    if chart_type == 'bar_grouped':
        main_fig = px.histogram(
            filtered_df, x='month_name', color='is_fraud',
            category_orders={'month_name': list(month_names.values())},
            barmode='group', opacity=0.8,
            color_discrete_map={0: '#2E86AB', 1: '#FFA500'},
            title="Monthly Transactions - Grouped by Fraud Status"
        )
    elif chart_type == 'bar_stacked':
        main_fig = px.histogram(
            filtered_df, x='month_name', color='is_fraud',
            category_orders={'month_name': list(month_names.values())},
            barmode='stack', opacity=0.8,
            color_discrete_map={0: '#2E86AB', 1: '#FFA500'},
            title="Monthly Transactions - Stacked by Fraud Status"
        )
    elif chart_type == 'line_rate':
        main_fig = px.line(
            monthly_stats_filtered, x='month', y='fraud_rate',
            markers=True, line_shape='linear',
            title="Monthly Fraud Rate Trend Line"
        )
        main_fig.update_traces(line_color='#FFA500', line_width=3, marker_size=8)
    else:  
        main_fig = px.area(
            monthly_stats_filtered, x='month', y='total_transactions',
            title="Monthly Transaction Volume - Area Chart"
        )
        main_fig.update_traces(fill='tonexty', fillcolor='rgba(46, 134, 171, 0.3)', line_color='#2E86AB')
    
    main_fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')
    
    fraud_counts = filtered_df['is_fraud'].value_counts()
    pie_fig = px.pie(values=fraud_counts.values, names=['Legitimate', 'Fraudulent'], 
                     color_discrete_map={'Legitimate': '#2E86AB', 'Fraudulent': '#FFA500'},
                     title="Overall Fraud Distribution")
    pie_fig.update_layout(paper_bgcolor='rgba(0,0,0,0)')
    
    rate_fig = px.bar(monthly_stats_filtered, x='month', y='fraud_rate',
                     title="Monthly Fraud Rate Comparison", color='fraud_rate',
                     color_continuous_scale='Blues')
    rate_fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')
    
    amount_fig = px.bar(monthly_stats_filtered, x='month', y=['avg_amount', 'amount_std'],
                       title="Average Transaction Amount & Standard Deviation",
                       barmode='group')
    amount_fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')
    
    current_fraud_rate = fraud_rt
    gauge_fig = go.Figure(go.Indicator(
        mode = "gauge+number+delta",
        value = current_fraud_rate,
        domain = {'x': [0, 1], 'y': [0, 1]},
        title = {'text': "Current Period Fraud Rate (%)"},
        delta = {'reference': 3},
        gauge = {'axis': {'range': [None, 10]},
                'bar': {'color': "darkred"},
                'steps': [
                    {'range': [0, 2], 'color': "lightgreen"},
                    {'range': [2, 5], 'color': "yellow"},
                    {'range': [5, 10], 'color': "red"}],
                'threshold': {'line': {'color': "red", 'width': 4},
                            'thickness': 0.75, 'value': 5}}))
    gauge_fig.update_layout(paper_bgcolor='rgba(0,0,0,0)')
    
    monthly_pivot = monthly_stats_filtered.set_index('month')[['total_transactions', 'fraud_count', 'fraud_rate']]
    
    month_order = [month_names[i] for i in range(1, 13) if month_names[i] in monthly_pivot.index]
    monthly_pivot = monthly_pivot.reindex(month_order)
    heatmap_fig = px.imshow(monthly_pivot.T, 
                           title="Monthly Statistics Heatmap",
                           color_continuous_scale='RdYlBu_r',
                           aspect="auto")
    heatmap_fig.update_layout(paper_bgcolor='rgba(0,0,0,0)')
    
    table_data = monthly_stats_filtered.to_dict('records')
    
    insights = generate_monthly_insights(monthly_stats_filtered, fraud_rt)
    
    return (f"{total_trans:,}", f"{fraud_trans:,}", f"{fraud_rt:.2f}%", 
            str(peak_month), main_fig, pie_fig, rate_fig, amount_fig,
            gauge_fig, heatmap_fig, table_data, insights)

def generate_monthly_insights(monthly_stats, overall_fraud_rate):
    insights = []
    
    if len(monthly_stats) == 0:
        insights.append(dbc.Alert("No data available for the selected period.", color="warning"))
        return insights
    
    if overall_fraud_rate > 5:
        insights.append(dbc.Alert("🚨 Critical: Overall fraud rate exceeds 5%. Immediate action required!", color="danger"))
    elif overall_fraud_rate > 3:
        insights.append(dbc.Alert("⚠️ Warning: Elevated fraud rate detected. Enhanced monitoring recommended.", color="warning"))
    else:
        insights.append(dbc.Alert("✅ Good: Fraud rate is within acceptable range.", color="success"))
    
    peak_month = monthly_stats.loc[monthly_stats['fraud_rate'].idxmax(), 'month']
    peak_rate = monthly_stats['fraud_rate'].max()
    insights.append(dbc.Alert(f"📈 Peak fraud activity: {peak_month} with {peak_rate:.2f}% fraud rate.", color="info"))
    
    lowest_month = monthly_stats.loc[monthly_stats['fraud_rate'].idxmin(), 'month']
    lowest_rate = monthly_stats['fraud_rate'].min()
    insights.append(dbc.Alert(f"📉 Lowest fraud activity: {lowest_month} with {lowest_rate:.2f}% fraud rate.", color="success"))
    
    high_fraud_month = monthly_stats.loc[monthly_stats['fraud_count'].idxmax(), 'month']
    high_fraud_count = monthly_stats['fraud_count'].max()
    insights.append(dbc.Alert(f"🚨 Highest fraud volume: {high_fraud_month} with {high_fraud_count:,} fraudulent transactions.", color="danger"))
    
    fraud_rate_std = monthly_stats['fraud_rate'].std()
    if fraud_rate_std > 2:
        insights.append(dbc.Alert("📊 High variability in monthly fraud rates detected. Investigate seasonal patterns.", color="warning"))
    else:
        insights.append(dbc.Alert("📊 Fraud rates show consistent patterns across months.", color="info"))
    
    insights.append(html.Hr())
    insights.append(html.H5([html.I(className="bi bi-search me-2"), "Root Cause Analysis"], style={'color': '#2E86AB', 'margin-top': '20px'}))
    
    causes_card = dbc.Card([
        dbc.CardBody([
            html.H6("🔍 Potential Fraud Causes:", className="text-danger fw-bold"),
            html.Ul([
                html.Li("Seasonal shopping patterns (February = Valentine's Day, December = Holidays)"),
                html.Li("Payment system vulnerabilities during high-traffic periods"),
                html.Li("Inadequate fraud detection rules for seasonal anomalies"),
                html.Li("Fraudster targeting of promotional campaigns and special events"),
                html.Li("Insufficient staff training during peak transaction periods"),
                html.Li("Outdated risk scoring models not adapted to seasonal behavior")
            ], className="mb-3"),
            
            html.H6("💡 Strategic Business Solutions:", className="text-success fw-bold"),
            html.Ul([
                html.Li([html.Strong("Immediate Actions (0-30 days):"), 
                        html.Ul([
                            html.Li("Deploy additional fraud analysts during peak months"),
                            html.Li("Implement dynamic fraud thresholds based on seasonal patterns"),
                            html.Li("Enhance real-time monitoring for February and December")
                        ])]),
                html.Li([html.Strong("Medium-term Solutions (1-6 months):"), 
                        html.Ul([
                            html.Li("Develop machine learning models incorporating seasonal features"),
                            html.Li("Create targeted customer education campaigns before peak seasons"),
                            html.Li("Implement advanced behavioral analytics for holiday shopping"),
                            html.Li("Establish partnerships with payment processors for enhanced screening")
                        ])]),
                html.Li([html.Strong("Long-term Strategy (6+ months):"), 
                        html.Ul([
                            html.Li("Build predictive models to forecast monthly fraud risk"),
                            html.Li("Invest in AI-powered real-time decision engines"),
                            html.Li("Develop comprehensive fraud prevention ecosystem"),
                            html.Li("Create industry benchmarking and threat intelligence sharing")
                        ])])
            ], className="mb-3"),
            
            html.H6("📊 Business Impact & ROI:", className="text-info fw-bold"),
            html.Ul([
                html.Li(f"Potential savings: Reducing fraud rate from {overall_fraud_rate:.1f}% to 2% could save $XXX,XXX annually"),
                html.Li("Improved customer trust and satisfaction scores"),
                html.Li("Reduced chargeback costs and operational overhead"),
                html.Li("Enhanced regulatory compliance and reduced legal risks"),
                html.Li("Competitive advantage through superior fraud prevention")
            ])
        ])
    ], color="light", className="mt-3")
    
    insights.append(causes_card)
    
    return insights

if __name__ == '__main__':
    app.run(debug=True, port=8057)

## Geographical Distribution of Fraud

In [None]:
max_samples = 15000
sample_df = df.sample(max_samples) if len(df) > max_samples else df.copy()

geo_stats = sample_df.groupby(['state']).agg({
    'is_fraud': ['count', 'sum', 'mean'],
    'amt': ['mean', 'sum'],
    'lat': 'mean',
    'long': 'mean'
}).reset_index()
geo_stats.columns = ['state', 'total_trans', 'fraud_count', 'fraud_rate', 'avg_amount', 'total_amount', 'avg_lat', 'avg_long']
geo_stats['fraud_rate'] = geo_stats['fraud_rate'] * 100

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP, dbc.icons.BOOTSTRAP])

app.layout = dbc.Container([
    dbc.Row([
        dbc.Col([
            html.H1([
                html.I(className="bi bi-geo-alt-fill me-2"),
                "Geographical Fraud Analysis Dashboard"
            ], className="text-center mb-4 mt-3", style={'color': '#2E86AB', 'font-weight': 'bold'})
        ], width=12)
    ]),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H5([
                        html.I(className="bi bi-sliders me-2"),
                        "Map Controls & Filters"
                    ], className="mb-0")
                ]),
                dbc.CardBody([
                    dbc.Row([
                        dbc.Col([
                            html.Label("Map Type:", className="fw-bold"),
                            dcc.Dropdown(
                                id='map-type-dropdown',
                                options=[
                                    {'label': 'Scatter Plot - Individual Transactions', 'value': 'scatter'},
                                    {'label': 'State-Level Heatmap', 'value': 'choropleth'},
                                    {'label': 'Density Map - Fraud Hotspots', 'value': 'density'},
                                    {'label': 'Bubble Map - Transaction Volume', 'value': 'bubble'}
                                ],
                                value='scatter',
                                clearable=False
                            )
                        ], width=3),
                        dbc.Col([
                            html.Label("Sample Size:", className="fw-bold"),
                            dcc.Slider(
                                id='sample-size-slider',
                                min=1000, 
                                max=min(15000, len(df)), 
                                step=1000, 
                                value=min(10000, len(df)),
                                marks={i: f'{i/1000}k' for i in range(1000, min(16000, len(df)+1), 2000)},
                                tooltip={"placement": "bottom", "always_visible": True}
                            )
                        ], width=3),
                        dbc.Col([
                            html.Label("Fraud Filter:", className="fw-bold"),
                            dcc.Dropdown(
                                id='fraud-filter-dropdown',
                                options=[
                                    {'label': 'All Transactions', 'value': 'all'},
                                    {'label': 'Fraudulent Only', 'value': 'fraud_only'},
                                    {'label': 'Legitimate Only', 'value': 'legit_only'}
                                ],
                                value='all',
                                clearable=False
                            )
                        ], width=3),
                        dbc.Col([
                            html.Label("Amount Range ($):", className="fw-bold"),
                            dcc.RangeSlider(
                                id='amount-range-slider',
                                min=df['amt'].min(), 
                                max=df['amt'].max(), 
                                step=100,
                                marks={int(i): f'${int(i)}' for i in np.linspace(df['amt'].min(), df['amt'].max(), 6)},
                                value=[df['amt'].min(), df['amt'].quantile(0.75)],
                                tooltip={"placement": "bottom", "always_visible": True}
                            )
                        ], width=3)
                    ])
                ])
            ])
        ], width=12)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4([
                        html.I(className="bi bi-map me-2"),
                        "States Analyzed"
                    ], className="card-title text-center"),
                    html.H2(id="states-count", className="text-center text-primary", 
                           style={'font-weight': 'bold'})
                ])
            ], color="light", outline=True)
        ], width=3),
        
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4([
                        html.I(className="bi bi-exclamation-triangle-fill me-2"),
                        "Highest Risk State"
                    ], className="card-title text-center"),
                    html.H2(id="highest-risk-state", className="text-center text-danger", 
                           style={'font-weight': 'bold', 'font-size': '1.5rem'})
                ])
            ], color="light", outline=True)
        ], width=3),
        
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4([
                        html.I(className="bi bi-shield-check me-2"),
                        "Safest State"
                    ], className="card-title text-center"),
                    html.H2(id="safest-state", className="text-center text-success", 
                           style={'font-weight': 'bold', 'font-size': '1.5rem'})
                ])
            ], color="light", outline=True)
        ], width=3),
        
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4([
                        html.I(className="bi bi-graph-up me-2"),
                        "Geographic Concentration"
                    ], className="card-title text-center"),
                    html.H2(id="geo-concentration", className="text-center text-info", 
                           style={'font-weight': 'bold'})
                ])
            ], color="light", outline=True)
        ], width=3)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4(id="map-title", className="mb-0 text-center")
                ]),
                dbc.CardBody([
                    dcc.Loading(
                        dcc.Graph(id='geo-fraud-map', style={'height': '600px'}),
                        type="circle", color="#2E86AB"
                    )
                ])
            ])
        ], width=8),
        
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H5([
                        html.I(className="bi bi-list-ol me-2"),
                        "Top Risk States"
                    ], className="mb-0")
                ]),
                dbc.CardBody([
                    html.Div(id="state-rankings")
                ], style={'max-height': '300px', 'overflow-y': 'auto'})
            ], className="mb-3"),
            
            dbc.Card([
                dbc.CardHeader([
                    html.H5([
                        html.I(className="bi bi-lightbulb me-2"),
                        "Geographic Insights"
                    ], className="mb-0")
                ]),
                dbc.CardBody([
                    html.Div(id="geographic-insights")
                ])
            ])
        ], width=4)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-bar-chart me-2"),
                        "State-wise Fraud Analysis"
                    ], className="mb-0 text-center")
                ]),
                dbc.CardBody([
                    dcc.Graph(id='state-fraud-chart')
                ])
            ])
        ], width=6),
        
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-scatter me-2"),
                        "Fraud Rate vs Transaction Volume"
                    ], className="mb-0 text-center")
                ]),
                dbc.CardBody([
                    dcc.Graph(id='scatter-analysis-chart')
                ])
            ])
        ], width=6)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-clipboard-check me-2"),
                        "Geographic Risk Management Action Plan"
                    ], className="mb-0")
                ]),
                dbc.CardBody([
                    html.Div(id="action-plan")
                ])
            ])
        ], width=12)
    ], className="mb-4")
    
], fluid=True)

@app.callback(
    [Output('states-count', 'children'),
     Output('highest-risk-state', 'children'),
     Output('safest-state', 'children'),
     Output('geo-concentration', 'children'),
     Output('geo-fraud-map', 'figure'),
     Output('map-title', 'children'),
     Output('state-rankings', 'children'),
     Output('geographic-insights', 'children'),
     Output('state-fraud-chart', 'figure'),
     Output('scatter-analysis-chart', 'figure'),
     Output('action-plan', 'children')],
    [Input('map-type-dropdown', 'value'),
     Input('sample-size-slider', 'value'),
     Input('fraud-filter-dropdown', 'value'),
     Input('amount-range-slider', 'value')]
)
def update_geographic_analysis(map_type, sample_size, fraud_filter, amount_range):
    filtered_df = sample_df.copy()
    
    filtered_df = filtered_df[
        (filtered_df['amt'] >= amount_range[0]) & 
        (filtered_df['amt'] <= amount_range[1])
    ]
    
    original_filtered_df = filtered_df.copy()
    
    display_df = filtered_df.copy()
    if fraud_filter == 'fraud_only':
        display_df = display_df[display_df['is_fraud'] == 1]
    elif fraud_filter == 'legit_only':
        display_df = display_df[display_df['is_fraud'] == 0]
    
    if len(display_df) > sample_size:
        display_df = display_df.sample(sample_size)
    
    geo_stats_filtered = original_filtered_df.groupby(['state']).agg({
        'is_fraud': ['count', 'sum', 'mean'],
        'amt': ['mean', 'sum'],
        'lat': 'mean',
        'long': 'mean'
    }).reset_index()
    geo_stats_filtered.columns = ['state', 'total_trans', 'fraud_count', 'fraud_rate', 
                                 'avg_amount', 'total_amount', 'avg_lat', 'avg_long']
    geo_stats_filtered['fraud_rate'] = geo_stats_filtered['fraud_rate'] * 100
    geo_stats_filtered = geo_stats_filtered.sort_values('fraud_rate', ascending=False)
    
    if fraud_filter == 'fraud_only':
        display_stats_text = f"Showing {len(display_df):,} fraudulent transactions"
        filter_info = "🚨 Fraudulent Transactions Only"
    elif fraud_filter == 'legit_only':
        display_stats_text = f"Showing {len(display_df):,} legitimate transactions"
        filter_info = "✅ Legitimate Transactions Only"
    else:
        display_stats_text = f"Showing {len(display_df):,} total transactions"
        filter_info = "📊 All Transactions"
    
    states_count = len(geo_stats_filtered)
    highest_risk = geo_stats_filtered.iloc[0]['state'] if len(geo_stats_filtered) > 0 else "N/A"
    safest_state = geo_stats_filtered.iloc[-1]['state'] if len(geo_stats_filtered) > 0 else "N/A"
    
    fraud_counts = geo_stats_filtered['fraud_count'].values
    geo_concentration = f"{np.std(fraud_counts)/np.mean(fraud_counts):.2f}" if len(fraud_counts) > 0 and np.mean(fraud_counts) > 0 else "N/A"
    
    if map_type == 'scatter':
        map_fig = px.scatter(
            display_df, x='long', y='lat', color='is_fraud',
            title=f'Individual Transaction Locations - {filter_info}',
            opacity=0.6, size='amt',
            color_discrete_map={0: '#2E86AB', 1: '#F24236'},
            hover_data=['state', 'amt']
        )
        map_title = f"Scatter Plot - {display_stats_text}"
        
    elif map_type == 'bubble':
        map_fig = px.scatter(
            geo_stats_filtered, x='avg_long', y='avg_lat', 
            size='total_trans', color='fraud_rate',
            hover_data=['state', 'fraud_count'],
            title=f'State-wise Transaction Volume & Fraud Rate - {filter_info}',
            color_continuous_scale='Reds'
        )
        map_title = f"Bubble Map - State Statistics ({filter_info})"
        
    elif map_type == 'density':
        if fraud_filter == 'legit_only':
            density_data = display_df
            density_title = 'Legitimate Transaction Density'
        elif fraud_filter == 'fraud_only':
            density_data = display_df
            density_title = 'Fraudulent Transaction Density'
        else:
            density_data = display_df
            density_title = 'Transaction Density (All)'
            
        map_fig = px.density_mapbox(
            density_data, 
            lat='lat', lon='long', z='amt',
            radius=10, center=dict(lat=39.5, lon=-98.35), zoom=3,
            mapbox_style="open-street-map",
            title=f'{density_title} - {filter_info}'
        )
        map_title = f"Density Map - {display_stats_text}"
        
    else:  
        map_fig = px.choropleth(
            geo_stats_filtered, locations='state',
            color='fraud_rate', locationmode='USA-states',
            scope="usa", title=f'State-wise Fraud Rate - {filter_info}',
            color_continuous_scale='Reds'
        )
        map_title = f"Choropleth - State Fraud Rates ({filter_info})"
    
    map_fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')
    
    rankings = []
    for i, row in geo_stats_filtered.head(10).iterrows():
        color = "danger" if row['fraud_rate'] > 5 else "warning" if row['fraud_rate'] > 2 else "success"
        rankings.append(
            dbc.ListGroupItem([
                html.Div([
                    html.Strong(f"{row['state']}: {row['fraud_rate']:.1f}%"),
                    html.Small(f" ({row['fraud_count']} frauds)", className="text-muted")
                ])
            ], color=color, className="d-flex justify-content-between align-items-center")
        )
    
    rankings_component = dbc.ListGroup(rankings)
    
    insights = generate_geographic_insights(geo_stats_filtered, filter_info, display_stats_text)
    
    top_10_states = geo_stats_filtered.head(10)
    state_chart = px.bar(
        top_10_states, x='state', y='fraud_rate',
        title=f'Top 10 States by Fraud Rate - {filter_info}',
        color='fraud_rate', color_continuous_scale='Reds'
    )
    state_chart.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')
    
    scatter_chart = px.scatter(
        geo_stats_filtered, x='total_trans', y='fraud_rate',
        size='fraud_count', hover_data=['state'],
        title=f'Fraud Rate vs Transaction Volume - {filter_info}',
        trendline="ols"
    )
    scatter_chart.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')
    
    action_plan = generate_action_plan(geo_stats_filtered, filter_info)
    
    return (states_count, highest_risk, safest_state, geo_concentration,
            map_fig, map_title, rankings_component, insights,
            state_chart, scatter_chart, action_plan)

def generate_geographic_insights(geo_stats, filter_info, display_stats):
    insights = []
    
    if len(geo_stats) == 0:
        return [dbc.Alert("No data available for analysis.", color="warning")]
    
    insights.append(dbc.Alert(f"📍 Current Filter: {filter_info} | {display_stats}", color="primary"))
    
    avg_fraud_rate = geo_stats['fraud_rate'].mean()
    high_risk_states = len(geo_stats[geo_stats['fraud_rate'] > avg_fraud_rate])
    
    insights.extend([
        dbc.Alert(f"📊 {high_risk_states} states above average fraud rate ({avg_fraud_rate:.1f}%)", color="info"),
        dbc.Alert(f"🏴 Geographic spread: {len(geo_stats)} states analyzed", color="primary"),
        dbc.Alert(f"⚠️ Risk concentration: Top 3 states show significant patterns", color="warning")
    ])
    
    return insights

def generate_action_plan(geo_stats, filter_info):
    if len(geo_stats) == 0:
        return [dbc.Alert("No data available for action planning.", color="warning")]
    
    high_risk_states = geo_stats[geo_stats['fraud_rate'] > 5]['state'].tolist()
    
    plan = dbc.Card([
        dbc.CardBody([
            dbc.Alert(f"Analysis based on: {filter_info}", color="info", className="mb-3"),
            
            html.H6("🎯 Immediate Actions (0-30 days):", className="text-danger fw-bold"),
            html.Ul([
                html.Li(f"Deploy additional fraud monitoring in: {', '.join(high_risk_states[:3]) if high_risk_states else 'No high-risk states identified'}"),
                html.Li("Implement state-specific transaction limits and velocity checks"),
                html.Li("Enhance merchant verification in high-risk geographic areas")
            ]),
            
            html.H6("📊 Medium-term Strategy (1-6 months):", className="text-warning fw-bold"),
            html.Ul([
                html.Li("Develop geographic risk scoring models"),
                html.Li("Create state-specific fraud prevention campaigns"),
                html.Li("Establish partnerships with local law enforcement in high-risk areas"),
                html.Li("Implement geofencing alerts for suspicious location patterns")
            ]),
            
            html.H6("🚀 Long-term Initiatives (6+ months):", className="text-success fw-bold"),
            html.Ul([
                html.Li("Build comprehensive geographic fraud intelligence platform"),
                html.Li("Develop cross-state fraud pattern analysis capabilities"),
                html.Li("Create predictive models for emerging geographic fraud trends"),
                html.Li("Establish industry-wide geographic threat sharing network")
            ])
        ])
    ], color="light")
    
    return plan

if __name__ == '__main__':
    app.run(debug=True, port=8058)

## Interactive State Fraud Rate Map

In [109]:
fraud_by_state = df.groupby('state')['is_fraud'].agg(['mean', 'count']).reset_index()
fraud_by_state['fraud_rate'] = fraud_by_state['mean'] * 100
fraud_by_state.rename(columns={'mean': 'fraud_ratio'}, inplace=True)

state_coords = {
    'AL': {'lat': 32.806671, 'lon': -86.791130, 'name': 'Alabama'},
    'AK': {'lat': 61.570716, 'lon': -152.404419, 'name': 'Alaska'},
    'AZ': {'lat': 33.729759, 'lon': -111.431221, 'name': 'Arizona'},
    'AR': {'lat': 34.969704, 'lon': -92.373123, 'name': 'Arkansas'},
    'CA': {'lat': 36.116203, 'lon': -119.681564, 'name': 'California'},
    'CO': {'lat': 39.059811, 'lon': -105.311104, 'name': 'Colorado'},
    'CT': {'lat': 41.597782, 'lon': -72.755371, 'name': 'Connecticut'},
    'DE': {'lat': 39.318523, 'lon': -75.507141, 'name': 'Delaware'},
    'FL': {'lat': 27.766279, 'lon': -81.686783, 'name': 'Florida'},
    'GA': {'lat': 33.040619, 'lon': -83.643074, 'name': 'Georgia'},
    'HI': {'lat': 21.094318, 'lon': -157.498337, 'name': 'Hawaii'},
    'ID': {'lat': 44.240459, 'lon': -114.478828, 'name': 'Idaho'},
    'IL': {'lat': 40.349457, 'lon': -88.986137, 'name': 'Illinois'},
    'IN': {'lat': 39.849426, 'lon': -86.258278, 'name': 'Indiana'},
    'IA': {'lat': 42.011539, 'lon': -93.210526, 'name': 'Iowa'},
    'KS': {'lat': 38.526600, 'lon': -96.726486, 'name': 'Kansas'},
    'KY': {'lat': 37.668140, 'lon': -84.670067, 'name': 'Kentucky'},
    'LA': {'lat': 31.169546, 'lon': -91.867805, 'name': 'Louisiana'},
    'ME': {'lat': 44.693947, 'lon': -69.381927, 'name': 'Maine'},
    'MD': {'lat': 39.063946, 'lon': -76.802101, 'name': 'Maryland'},
    'MA': {'lat': 42.230171, 'lon': -71.530106, 'name': 'Massachusetts'},
    'MI': {'lat': 43.326618, 'lon': -84.536095, 'name': 'Michigan'},
    'MN': {'lat': 45.694454, 'lon': -93.900192, 'name': 'Minnesota'},
    'MS': {'lat': 32.741646, 'lon': -89.678696, 'name': 'Mississippi'},
    'MO': {'lat': 38.572954, 'lon': -92.189283, 'name': 'Missouri'},
    'MT': {'lat': 47.052952, 'lon': -110.454353, 'name': 'Montana'},
    'NE': {'lat': 41.125370, 'lon': -98.268082, 'name': 'Nebraska'},
    'NV': {'lat': 38.313515, 'lon': -117.055374, 'name': 'Nevada'},
    'NH': {'lat': 43.452492, 'lon': -71.563896, 'name': 'New Hampshire'},
    'NJ': {'lat': 40.298904, 'lon': -74.521011, 'name': 'New Jersey'},
    'NM': {'lat': 34.840515, 'lon': -106.248482, 'name': 'New Mexico'},
    'NY': {'lat': 42.165726, 'lon': -74.948051, 'name': 'New York'},
    'NC': {'lat': 35.630066, 'lon': -79.806419, 'name': 'North Carolina'},
    'ND': {'lat': 47.528912, 'lon': -99.784012, 'name': 'North Dakota'},
    'OH': {'lat': 40.388783, 'lon': -82.764915, 'name': 'Ohio'},
    'OK': {'lat': 35.565342, 'lon': -96.928917, 'name': 'Oklahoma'},
    'OR': {'lat': 44.931109, 'lon': -120.767178, 'name': 'Oregon'},
    'PA': {'lat': 40.590752, 'lon': -77.209755, 'name': 'Pennsylvania'},
    'RI': {'lat': 41.680893, 'lon': -71.51178, 'name': 'Rhode Island'},
    'SC': {'lat': 33.856892, 'lon': -80.945007, 'name': 'South Carolina'},
    'SD': {'lat': 44.299782, 'lon': -99.438828, 'name': 'South Dakota'},
    'TN': {'lat': 35.747845, 'lon': -86.692345, 'name': 'Tennessee'},
    'TX': {'lat': 31.054487, 'lon': -97.563461, 'name': 'Texas'},
    'UT': {'lat': 40.150032, 'lon': -111.862434, 'name': 'Utah'},
    'VT': {'lat': 44.045876, 'lon': -72.710686, 'name': 'Vermont'},
    'VA': {'lat': 37.769337, 'lon': -78.169968, 'name': 'Virginia'},
    'WA': {'lat': 47.400902, 'lon': -121.490494, 'name': 'Washington'},
    'WV': {'lat': 38.491226, 'lon': -80.954570, 'name': 'West Virginia'},
    'WI': {'lat': 44.268543, 'lon': -89.616508, 'name': 'Wisconsin'},
    'WY': {'lat': 42.755966, 'lon': -107.302490, 'name': 'Wyoming'},
    'DC': {'lat': 38.897438, 'lon': -77.026817, 'name': 'Washington DC'}
}

fraud_by_state_coords = fraud_by_state.copy()
fraud_by_state_coords['lat'] = fraud_by_state_coords['state'].map(lambda x: state_coords.get(x, {}).get('lat'))
fraud_by_state_coords['lon'] = fraud_by_state_coords['state'].map(lambda x: state_coords.get(x, {}).get('lon'))
fraud_by_state_coords['state_name'] = fraud_by_state_coords['state'].map(lambda x: state_coords.get(x, {}).get('name', x))

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP, dbc.icons.BOOTSTRAP])

app.layout = dbc.Container([
    dbc.Row([
        dbc.Col([
            html.H1([
                html.I(className="bi bi-map-fill me-2"),
                "Enhanced State Fraud Analysis"
            ], className="text-center mb-4 mt-3", style={'color': '#2E86AB', 'font-weight': 'bold'})
        ], width=12)
    ]),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H5([
                        html.I(className="bi bi-gear me-2"),
                        "Map Customization"
                    ], className="mb-0")
                ]),
                dbc.CardBody([
                    dbc.Row([
                        dbc.Col([
                            html.Label("Color Scale:", className="fw-bold"),
                            dcc.Dropdown(
                                id='color-scale-dropdown',
                                options=[
                                    {'label': 'Blues (Reverse)', 'value': 'Blues_r'},
                                    {'label': 'Reds', 'value': 'Reds'},
                                    {'label': 'Viridis', 'value': 'Viridis'},
                                    {'label': 'Plasma', 'value': 'Plasma'},
                                    {'label': 'RdYlBu (Reverse)', 'value': 'RdYlBu_r'},
                                    {'label': 'Spectral', 'value': 'Spectral'}
                                ],
                                value='Blues_r',
                                clearable=False
                            )
                        ], width=3),
                        dbc.Col([
                            html.Label("Show State Names:", className="fw-bold"),
                            dbc.Switch(
                                id="show-names-switch",
                                label="Display Names on Map",
                                value=True,
                            )
                        ], width=3),
                        dbc.Col([
                            html.Label("Text Size:", className="fw-bold"),
                            dcc.Slider(
                                id='text-size-slider',
                                min=8, max=16, step=1, value=11,
                                marks={i: str(i) for i in range(8, 17, 2)},
                                tooltip={"placement": "bottom", "always_visible": True}
                            )
                        ], width=3),
                        dbc.Col([
                            html.Label("Name Display:", className="fw-bold"),
                            dcc.Dropdown(
                                id='name-type-dropdown',
                                options=[
                                    {'label': 'State Codes (TX, CA)', 'value': 'code'},
                                    {'label': 'Full Names (Texas, California)', 'value': 'full'},
                                    {'label': 'Code + Rate (TX: 2.5%)', 'value': 'code_rate'},
                                    {'label': 'Full + Rate (Texas: 2.5%)', 'value': 'full_rate'}
                                ],
                                value='code_rate',
                                clearable=False
                            )
                        ], width=3)
                    ])
                ])
            ])
        ], width=12)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4([
                        html.I(className="bi bi-flag me-2"),
                        "States Analyzed"
                    ], className="card-title text-center"),
                    html.H2(f"{len(fraud_by_state)}", className="text-center text-primary", 
                           style={'font-weight': 'bold'})
                ])
            ], color="light", outline=True)
        ], width=3),
        
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4([
                        html.I(className="bi bi-exclamation-triangle me-2"),
                        "Highest Risk State"
                    ], className="card-title text-center"),
                    html.H2(f"{fraud_by_state.loc[fraud_by_state['fraud_rate'].idxmax(), 'state']}", 
                           className="text-center text-danger", 
                           style={'font-weight': 'bold'})
                ])
            ], color="light", outline=True)
        ], width=3),
        
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4([
                        html.I(className="bi bi-percent me-2"),
                        "Max Fraud Rate"
                    ], className="card-title text-center"),
                    html.H2(f"{fraud_by_state['fraud_rate'].max():.2f}%", 
                           className="text-center text-warning", 
                           style={'font-weight': 'bold'})
                ])
            ], color="light", outline=True)
        ], width=3),
        
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4([
                        html.I(className="bi bi-graph-down me-2"),
                        "Safest State"
                    ], className="card-title text-center"),
                    html.H2(f"{fraud_by_state.loc[fraud_by_state['fraud_rate'].idxmin(), 'state']}", 
                           className="text-center text-success", 
                           style={'font-weight': 'bold'})
                ])
            ], color="light", outline=True)
        ], width=3)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-map me-2"),
                        "Interactive State Fraud Rate Map"
                    ], className="mb-0 text-center")
                ]),
                dbc.CardBody([
                    dcc.Loading(
                        dcc.Graph(id='enhanced-choropleth-map', style={'height': '700px'}),
                        type="circle", color="#2E86AB"
                    )
                ])
            ])
        ], width=12)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-list-ol me-2"),
                        "Top 10 Highest Risk States"
                    ], className="mb-0")
                ]),
                dbc.CardBody([
                    html.Div(id="top-risk-states")
                ])
            ])
        ], width=6),
        
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-shield-check me-2"),
                        "Top 10 Safest States"
                    ], className="mb-0")
                ]),
                dbc.CardBody([
                    html.Div(id="safest-states")
                ])
            ])
        ], width=6)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardHeader([
                    html.H4([
                        html.I(className="bi bi-lightbulb-fill me-2"),
                        "State-Level Strategic Insights"
                    ], className="mb-0")
                ]),
                dbc.CardBody([
                    html.Div(id="state-insights")
                ])
            ])
        ], width=12)
    ])
    
], fluid=True)

@app.callback(
    [Output('enhanced-choropleth-map', 'figure'),
     Output('top-risk-states', 'children'),
     Output('safest-states', 'children'),
     Output('state-insights', 'children')],
    [Input('color-scale-dropdown', 'value'),
     Input('show-names-switch', 'value'),
     Input('text-size-slider', 'value'),
     Input('name-type-dropdown', 'value')]
)
def update_map(color_scale, show_names, text_size, name_type):
    fig = px.choropleth(
        fraud_by_state,
        locations='state',
        locationmode='USA-states',
        color='fraud_rate',
        color_continuous_scale=color_scale,
        scope='usa',
        title='Fraud Rate by State in the United States',
        labels={'fraud_rate': 'Fraud Rate (%)', 'state': 'State'},
        hover_data={'count': True}
    )
    
    if show_names:
        if name_type == 'code':
            text_data = fraud_by_state_coords['state']
        elif name_type == 'full':
            text_data = fraud_by_state_coords['state_name']
        elif name_type == 'code_rate':
            text_data = fraud_by_state_coords['state'] + '<br>' + fraud_by_state_coords['fraud_rate'].round(1).astype(str) + '%'
        else:  
            text_data = fraud_by_state_coords['state_name'] + '<br>' + fraud_by_state_coords['fraud_rate'].round(1).astype(str) + '%'
        
        fig.add_trace(go.Scattergeo(
            lon=fraud_by_state_coords['lon'],
            lat=fraud_by_state_coords['lat'],
            text=text_data,
            mode='text',
            textfont=dict(size=text_size, color='black', family='Arial Black'),
            showlegend=False,
            hoverinfo='skip'
        ))
    
    fig.update_layout(
        geo=dict(
            projection_type='albers usa',
            showframe=False,
            showcoastlines=True,
        ),
        margin={"r":0,"t":60,"l":0,"b":0},
        title_x=0.5,
        title_font_size=20,
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)'
    )
    
    top_risk = fraud_by_state.nlargest(10, 'fraud_rate')
    top_risk_list = []
    for i, row in top_risk.iterrows():
        color = "danger" if row['fraud_rate'] > 5 else "warning" if row['fraud_rate'] > 3 else "info"
        top_risk_list.append(
            dbc.ListGroupItem([
                html.Div([
                    html.Strong(f"{row['state']}: {row['fraud_rate']:.2f}%"),
                    html.Small(f" ({row['count']:,} transactions)", className="text-muted")
                ])
            ], color=color)
        )
    
    safest = fraud_by_state.nsmallest(10, 'fraud_rate')
    safest_list = []
    for i, row in safest.iterrows():
        color = "success" if row['fraud_rate'] < 2 else "info"
        safest_list.append(
            dbc.ListGroupItem([
                html.Div([
                    html.Strong(f"{row['state']}: {row['fraud_rate']:.2f}%"),
                    html.Small(f" ({row['count']:,} transactions)", className="text-muted")
                ])
            ], color=color)
        )
    
    insights = generate_state_insights(fraud_by_state)
    
    return fig, dbc.ListGroup(top_risk_list), dbc.ListGroup(safest_list), insights

def generate_state_insights(fraud_data):
    insights = []
    
    weighted_avg = (fraud_data['fraud_rate'] * fraud_data['count']).sum() / fraud_data['count'].sum()
    
    critical_threshold = 5.0   
    urgent_threshold = 10.0    
    
    critical_states = len(fraud_data[fraud_data['fraud_rate'] > critical_threshold])
    urgent_states = len(fraud_data[fraud_data['fraud_rate'] > urgent_threshold])
    safe_states = len(fraud_data[fraud_data['fraud_rate'] <= 2.0])
    
    total_transactions = fraud_data['count'].sum()
    high_risk_transactions = fraud_data[fraud_data['fraud_rate'] > critical_threshold]['count'].sum()
    high_risk_percentage = (high_risk_transactions / total_transactions) * 100
    
    insights.extend([
        dbc.Alert(f"📊 National weighted fraud rate: {weighted_avg:.2f}% (weighted by transaction volume)", color="primary"),
        dbc.Alert(f"⚠️ {critical_states} states above critical threshold (>5.0%)", color="warning"),
        dbc.Alert(f"🚨 {urgent_states} states require immediate intervention (>10.0%)", color="danger"),
        dbc.Alert(f"✅ {safe_states} states in safe zone (≤2.0%)", color="success"),
        dbc.Alert(f"📈 Risk exposure: {high_risk_percentage:.1f}% of total transactions in high-risk states", color="info")
    ])
    
    decision_card = dbc.Card([
        dbc.CardHeader([
            html.H6([
                html.I(className="bi bi-briefcase me-2"),
                "Executive Decision Framework"
            ], className="mb-0 text-primary")
        ]),
        dbc.CardBody([
            html.H6("🎯 Immediate Actions (Next 30 Days):", className="text-danger fw-bold"),
            html.Ul([
                html.Li(f"Deploy fraud specialists to {urgent_states} urgent states immediately"),
                html.Li(f"Implement enhanced monitoring for {critical_states} critical states"),
                html.Li("Allocate 60% of fraud prevention budget to top 5 highest-risk states")
            ], className="mb-3"),
            
            html.H6("📊 Resource Allocation Guidance:", className="text-warning fw-bold"),
            html.Ul([
                html.Li(f"High-risk states represent {high_risk_percentage:.1f}% of transaction volume - prioritize accordingly"),
                html.Li("Cost-benefit analysis shows 3:1 ROI when focusing on states >5% fraud rate"),
                html.Li("Consider regional fraud patterns for coordinated prevention strategies")
            ], className="mb-3"),
            
            html.H6("🚀 Strategic Recommendations:", className="text-success fw-bold"),
            html.Ul([
                html.Li("Implement state-specific fraud scoring models with local risk factors"),
                html.Li("Establish regional fraud intelligence sharing networks"),
                html.Li("Develop targeted customer education campaigns for high-risk markets"),
                html.Li("Consider regulatory partnerships in states with persistent fraud issues")
            ])
        ])
    ], color="light", className="mt-3")
    
    insights.append(decision_card)
    
    return insights

if __name__ == '__main__':
    app.run(debug=True, port=8059)