In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN
from datetime import datetime, timedelta

# 1. Generate Sample Dataset
def generate_sample_data(n_samples=1000):
    """
    Generate synthetic supply chain data for FMCG company
    """
    np.random.seed(42)

    # Date range for the last year
    end_date = datetime.now()
    start_date = end_date - timedelta(days=365)
    dates = [start_date + timedelta(days=x) for x in range((end_date - start_date).days)]

    # Randomly select dates (with replacement to simulate multiple events per day)
    selected_dates = np.random.choice(dates, n_samples)
    selected_dates = [date.strftime('%Y-%m-%d') for date in selected_dates]

    # Product categories
    categories = ['Beverages', 'Personal Care', 'Home Care', 'Food', 'Snacks']
    selected_categories = np.random.choice(categories, n_samples)

    # Regions
    regions = ['North', 'South', 'East', 'West', 'Central']
    selected_regions = np.random.choice(regions, n_samples)

    # Suppliers
    suppliers = ['Supplier A', 'Supplier B', 'Supplier C', 'Supplier D', 'Supplier E']
    selected_suppliers = np.random.choice(suppliers, n_samples)

    # Normal operational metrics
    order_quantities = np.random.normal(5000, 1000, n_samples)
    lead_times = np.random.normal(5, 1, n_samples)
    transport_times = np.random.normal(3, 0.5, n_samples)
    inventory_levels = np.random.normal(8000, 1500, n_samples)
    demand_forecast = np.random.normal(4800, 900, n_samples)
    production_capacity = np.random.normal(6000, 500, n_samples)

    # Introduce seasonal patterns
    seasonal_factor = np.sin(np.linspace(0, 2*np.pi, 365))
    seasonal_indices = [dates.index(datetime.strptime(date, '%Y-%m-%d').date())
                        if datetime.strptime(date, '%Y-%m-%d').date() in dates
                        else 0 for date in selected_dates]

    seasonal_effect = [seasonal_factor[i % len(seasonal_factor)] for i in seasonal_indices]

    # Ensure seasonal_effect has the same length as demand_forecast
    seasonal_effect = np.array(seasonal_effect)[:n_samples]

    demand_forecast = demand_forecast + seasonal_effect * 500

    # Introduce some anomalies (about 5% of the data)
    anomaly_indices = np.random.choice(n_samples, size=int(n_samples * 0.05), replace=False)

    # Supply disruption anomalies
    for idx in anomaly_indices[:len(anomaly_indices)//3]:
        lead_times[idx] *= np.random.uniform(2, 3)  # Significantly longer lead times
        inventory_levels[idx] *= np.random.uniform(0.2, 0.5)  # Much lower inventory

    # Demand spike anomalies
    for idx in anomaly_indices[len(anomaly_indices)//3:2*len(anomaly_indices)//3]:
        demand_forecast[idx] *= np.random.uniform(1.5, 2.5)  # Unexpected demand spikes
        inventory_levels[idx] *= np.random.uniform(0.3, 0.6)  # Lower inventory due to spikes

    # Production issues anomalies
    for idx in anomaly_indices[2*len(anomaly_indices)//3:]:
        production_capacity[idx] *= np.random.uniform(0.4, 0.7)  # Production capacity issues
        transport_times[idx] *= np.random.uniform(1.5, 2.5)  # Logistics delays

    # Weather impact (random severe weather days)
    weather_impact = np.zeros(n_samples)
    severe_weather_days = np.random.choice(n_samples, size=int(n_samples * 0.03), replace=False)
    weather_impact[severe_weather_days] = np.random.uniform(0.5, 1.0, size=len(severe_weather_days))

    # Create DataFrame
    df = pd.DataFrame({
        'date': selected_dates,
        'category': selected_categories,
        'region': selected_regions,
        'supplier': selected_suppliers,
        'order_quantity': order_quantities,
        'lead_time_days': lead_times,
        'transport_time_days': transport_times,
        'inventory_level': inventory_levels,
        'demand_forecast': demand_forecast,
        'production_capacity': production_capacity,
        'weather_impact': weather_impact,
        # Derived metrics
        'inventory_coverage_days': inventory_levels / (demand_forecast / 30),
        'capacity_utilization': demand_forecast / production_capacity,
    })

    # Add a few extreme outliers
    extreme_indices = np.random.choice(n_samples, size=10, replace=False)
    df.loc[extreme_indices, 'lead_time_days'] *= 5
    df.loc[extreme_indices, 'transport_time_days'] *= 4

    # Label the true anomalies for evaluation
    df['true_anomaly'] = 0
    df.loc[anomaly_indices, 'true_anomaly'] = 1
    df.loc[extreme_indices, 'true_anomaly'] = 1
    df.loc[severe_weather_days, 'true_anomaly'] = 1

    return df

# 2. Data Preprocessing
def preprocess_data(df):
    """
    Preprocess the supply chain data for anomaly detection
    """
    # Convert date to datetime
    df['date'] = pd.to_datetime(df['date'])

    # One-hot encode categorical variables
    df_processed = pd.get_dummies(df, columns=['category', 'region', 'supplier'])

    # Extract features for anomaly detection
    features = ['order_quantity', 'lead_time_days', 'transport_time_days',
                'inventory_level', 'demand_forecast', 'production_capacity',
                'weather_impact', 'inventory_coverage_days', 'capacity_utilization']

    # Scale the features
    scaler = StandardScaler()
    df_scaled = pd.DataFrame(
        scaler.fit_transform(df_processed[features]),
        columns=features
    )

    return df_scaled, features, scaler, df_processed

# 3. Implement Multiple Anomaly Detection Methods

# a. Isolation Forest
def isolation_forest_detection(X, contamination=0.05):
    """
    Detect anomalies using Isolation Forest
    """
    model = IsolationForest(
        n_estimators=100,
        max_samples='auto',
        contamination=contamination,
        random_state=42
    )

    # Fit and predict
    y_pred = model.fit_predict(X)

    # Convert predictions to binary (1 for normal, -1 for anomaly)
    # Convert to 0 for normal, 1 for anomaly to make it easier to understand
    anomalies = np.where(y_pred == -1, 1, 0)

    return anomalies, model

# b. DBSCAN Clustering
def dbscan_detection(X, eps=0.5, min_samples=5):
    """
    Detect anomalies using DBSCAN clustering
    """
    model = DBSCAN(eps=eps, min_samples=min_samples)
    clusters = model.fit_predict(X)

    # Points labeled as -1 are considered outliers in DBSCAN
    anomalies = np.where(clusters == -1, 1, 0)

    return anomalies, model

# 4. Ensemble Method
def ensemble_anomaly_detection(X):
    """
    Combine multiple anomaly detection methods
    """
    # Apply different methods
    isolation_anomalies, iso_model = isolation_forest_detection(X)

    # Apply PCA for dimensionality reduction before DBSCAN
    pca = PCA(n_components=0.95)  # Retain 95% of variance
    X_pca = pca.fit_transform(X)

    # Determine eps parameter adaptively
    from sklearn.neighbors import NearestNeighbors
    nn = NearestNeighbors(n_neighbors=2)
    nn.fit(X_pca)
    distances, _ = nn.kneighbors(X_pca)
    distances = np.sort(distances[:, 1])
    knee_point = np.argmax(distances[1:] - distances[:-1]) + 1
    eps = distances[knee_point]

    dbscan_anomalies, db_model = dbscan_detection(X_pca, eps=eps, min_samples=5)

    # Combine methods (a point is anomalous if either method flags it)
    ensemble_anomalies = np.logical_or(isolation_anomalies, dbscan_anomalies).astype(int)

    return {
        'isolation_forest': isolation_anomalies,
        'dbscan': dbscan_anomalies,
        'ensemble': ensemble_anomalies
    }, iso_model, db_model, pca

# 5. Root Cause Analysis
def identify_root_causes(df, anomaly_indices, features, scaler):
    """
    Identify potential root causes for detected anomalies
    """
    anomalous_data = df.iloc[anomaly_indices]
    normal_data = df.iloc[~np.isin(np.arange(len(df)), anomaly_indices)]

    # Calculate z-scores for each feature in anomalous data
    means = normal_data[features].mean()
    stds = normal_data[features].std()

    z_scores = (anomalous_data[features] - means) / stds

    # Identify significant deviations
    significant_features = {}
    for idx, row in z_scores.iterrows():
        significant_dev = []
        for feature, value in row.items():
            if abs(value) > 2:  # Z-score threshold
                direction = "high" if value > 0 else "low"
                significant_dev.append((feature, value, direction))

        # Sort by absolute z-score to find most significant deviations
        significant_dev.sort(key=lambda x: abs(x[1]), reverse=True)
        significant_features[idx] = significant_dev

    return significant_features

# 6. Visualization and Reporting
def visualize_anomalies(df, original_df, anomaly_results, features, pca):
    """
    Create visualizations for detected anomalies
    """
    # 1. PCA visualization of anomalies
    X_pca = pca.transform(df)

    plt.figure(figsize=(12, 10))

    # Create a subplot for each detection method
    methods = list(anomaly_results.keys())

    for i, method in enumerate(methods):
        plt.subplot(2, 2, i+1)
        plt.scatter(X_pca[:, 0], X_pca[:, 1], c=anomaly_results[method],
                   cmap='viridis', alpha=0.7)
        plt.title(f'Anomalies detected by {method.replace("_", " ").title()}')
        plt.xlabel('Principal Component 1')
        plt.ylabel('Principal Component 2')
        plt.colorbar(label='Anomaly (1) / Normal (0)')

    # Add a subplot for true anomalies
    plt.subplot(2, 2, 4)
    plt.scatter(X_pca[:, 0], X_pca[:, 1], c=original_df['true_anomaly'],
               cmap='viridis', alpha=0.7)
    plt.title('True Anomalies (for validation)')
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.colorbar(label='Anomaly (1) / Normal (0)')

    plt.tight_layout()
    plt.savefig('anomaly_detection_pca.png')
    plt.close()

    # 2. Time series plot with highlighted anomalies
    plt.figure(figsize=(15, 10))

    for i, feature in enumerate(['lead_time_days', 'inventory_coverage_days',
                                'capacity_utilization', 'transport_time_days']):
        plt.subplot(2, 2, i+1)

        # Sort by date for time series
        sorted_indices = np.argsort(original_df['date'])
        dates = original_df['date'].iloc[sorted_indices]
        values = original_df[feature].iloc[sorted_indices]
        anomalies = anomaly_results['ensemble'][sorted_indices]

        plt.plot(dates, values, 'b-', alpha=0.6, label=feature)
        plt.scatter(dates[anomalies == 1], values[anomalies == 1],
                   color='red', label='Detected Anomalies')

        plt.title(f'Time Series of {feature.replace("_", " ").title()} with Anomalies')
        plt.xlabel('Date')
        plt.ylabel(feature.replace("_", " ").title())
        plt.xticks(rotation=45)
        plt.legend()

    plt.tight_layout()
    plt.savefig('anomaly_time_series.png')
    plt.close()

    # 3. Feature correlation matrix
    plt.figure(figsize=(12, 10))
    corr = original_df[features].corr()
    sns.heatmap(corr, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
    plt.title('Feature Correlation Matrix')
    plt.tight_layout()
    plt.savefig('feature_correlation.png')
    plt.close()

    return

# 7. Main Function
def main():
    # Generate sample data
    print("Generating sample supply chain data...")
    df = generate_sample_data(n_samples=1000)

    # Save the raw dataset
    df.to_csv('supply_chain_data.csv', index=False)
    print(f"Raw dataset saved with {len(df)} records.")

    # Preprocess data
    print("Preprocessing data...")
    X_scaled, features, scaler, df_processed = preprocess_data(df)

    # Detect anomalies
    print("Detecting supply chain anomalies...")
    anomaly_results, iso_model, db_model, pca = ensemble_anomaly_detection(X_scaled)

    # Identify anomalies from ensemble method
    anomaly_indices = np.where(anomaly_results['ensemble'] == 1)[0]
    print(f"Detected {len(anomaly_indices)} anomalies out of {len(df)} records.")

    # Perform root cause analysis
    print("Analyzing root causes...")
    root_causes = identify_root_causes(df, anomaly_indices, features, scaler)

    # Output some example root causes
    print("\nExample Root Causes for Detected Anomalies:")
    for i, (idx, causes) in enumerate(list(root_causes.items())[:5]):
        print(f"\nAnomaly #{i+1} (Record #{idx}):")
        print(f"  Date: {df.iloc[idx]['date']}, Region: {df.iloc[idx]['region']}, Supplier: {df.iloc[idx]['supplier']}")
        print("  Potential Root Causes:")
        for feature, z_score, direction in causes[:3]:
            print(f"    - {feature.replace('_', ' ').title()} is abnormally {direction} (z-score: {z_score:.2f})")

    # Visualize results
    print("\nCreating visualizations...")
    visualize_anomalies(X_scaled, df, anomaly_results, features, pca)

    # Add anomaly flags to the original data
    df['anomaly_detected'] = anomaly_results['ensemble']
    df.to_csv('supply_chain_with_anomalies.csv', index=False)

    print("\nAnalysis complete! Files saved:")
    print("- supply_chain_data.csv: Original dataset")
    print("- supply_chain_with_anomalies.csv: Dataset with anomaly flags")
    print("- anomaly_detection_pca.png: PCA visualization of anomalies")
    print("- anomaly_time_series.png: Time series plots with highlighted anomalies")
    print("- feature_correlation.png: Correlation matrix of supply chain features")

    return df, anomaly_results, root_causes

if __name__ == "__main__":
    main()

Generating sample supply chain data...
Raw dataset saved with 1000 records.
Preprocessing data...
Detecting supply chain anomalies...
Detected 50 anomalies out of 1000 records.
Analyzing root causes...

Example Root Causes for Detected Anomalies:

Anomaly #1 (Record #3):
  Date: 2024-07-06 00:00:00, Region: North, Supplier: Supplier E
  Potential Root Causes:
    - Demand Forecast is abnormally high (z-score: 6.55)
    - Capacity Utilization is abnormally high (z-score: 5.04)
    - Inventory Level is abnormally low (z-score: -3.27)

Anomaly #2 (Record #13):
  Date: 2025-03-16 00:00:00, Region: East, Supplier: Supplier A
  Potential Root Causes:
    - Demand Forecast is abnormally high (z-score: 6.52)
    - Capacity Utilization is abnormally high (z-score: 4.45)
    - Inventory Level is abnormally low (z-score: -2.76)

Anomaly #3 (Record #24):
  Date: 2025-01-29 00:00:00, Region: South, Supplier: Supplier C
  Potential Root Causes:
    - Capacity Utilization is abnormally high (z-score:

In [None]:
pip install dash

Collecting dash
  Downloading dash-3.0.0-py3-none-any.whl.metadata (10 kB)
Collecting Flask<3.1,>=1.0.4 (from dash)
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting Werkzeug<3.1 (from dash)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Collecting stringcase>=1.2.0 (from dash)
  Downloading stringcase-1.2.0.tar.gz (3.0 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading dash-3.0.0-py3-none-any.whl (8.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.0/8.0 MB[0m [31m50.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading flask-3.0.3-py3-none-any.whl (101 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading werkzeug-3.0.6-py3-none-any.whl (227 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m228.0/228.0 kB[0m [31

In [7]:
import dash
from dash import dcc, html, Input, Output, dash_table
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Import our anomaly detection module
# In a real project, you would import from the previous file
# For hackathon purposes, let's assume we have the data and models ready
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN

# Sample data loading function (replace with your actual data)
def load_data():
    try:
        # Try to load the generated data from previous script
        df = pd.read_csv('supply_chain_with_anomalies.csv')
        df['date'] = pd.to_datetime(df['date'])
        return df
    except:
        # If file doesn't exist, generate new data using the function from previous script
        # This would import the generate_sample_data function from the previous file
        # For demo purposes, we'll include a simplified version
        np.random.seed(42)
        n_samples = 1000

        # Date range for the last year
        end_date = datetime.now()
        start_date = end_date - timedelta(days=365)
        dates = [start_date + timedelta(days=x) for x in range((end_date - start_date).days)]
        selected_dates = np.random.choice(dates, n_samples)
        selected_dates = [date.strftime('%Y-%m-%d') for date in selected_dates]

        categories = ['Beverages', 'Personal Care', 'Home Care', 'Food', 'Snacks']
        regions = ['North', 'South', 'East', 'West', 'Central']
        suppliers = ['Supplier A', 'Supplier B', 'Supplier C', 'Supplier D', 'Supplier E']

        df = pd.DataFrame({
            'date': pd.to_datetime(selected_dates),
            'category': np.random.choice(categories, n_samples),
            'region': np.random.choice(regions, n_samples),
            'supplier': np.random.choice(suppliers, n_samples),
            'order_quantity': np.random.normal(5000, 1000, n_samples),
            'lead_time_days': np.random.normal(5, 1, n_samples),
            'transport_time_days': np.random.normal(3, 0.5, n_samples),
            'inventory_level': np.random.normal(8000, 1500, n_samples),
            'demand_forecast': np.random.normal(4800, 900, n_samples),
            'production_capacity': np.random.normal(6000, 500, n_samples),
            'weather_impact': np.random.uniform(0, 0.3, n_samples),
        })

        # Create some derived metrics
        df['inventory_coverage_days'] = df['inventory_level'] / (df['demand_forecast'] / 30)
        df['capacity_utilization'] = df['demand_forecast'] / df['production_capacity']

        # Add random anomalies (5% of data)
        anomaly_indices = np.random.choice(n_samples, size=int(n_samples * 0.05), replace=False)
        df['anomaly_detected'] = 0
        df.loc[anomaly_indices, 'anomaly_detected'] = 1

        return df

# Initialize the Dash app
app = dash.Dash(__name__,
                meta_tags=[{"name": "viewport", "content": "width=device-width, initial-scale=1"}],
                title="Supply Chain Anomaly Detection")

# Load data
df = load_data()

# Define app layout
app.layout = html.Div([
    # Header
    html.Div([
        html.H1("FMCG Supply Chain Anomaly Detection",
                style={"margin-bottom": "0px", "color": "white"}),
        html.H4("Real-time Monitoring & Root Cause Analysis Dashboard",
                style={"margin-top": "0px", "color": "white"})
    ], style={"text-align": "center", "padding": "1rem", "background-color": "#2c3e50"}),

    # Filters row
    html.Div([
        html.Div([
            html.P("Date Range:"),
            dcc.DatePickerRange(
                id='date-range',
                min_date_allowed=df['date'].min().date(),
                max_date_allowed=df['date'].max().date(),
                start_date=df['date'].min().date(),
                end_date=df['date'].max().date()
            )
        ], style={"width": "25%", "display": "inline-block", "padding": "10px"}),

        html.Div([
            html.P("Region:"),
            dcc.Dropdown(
                id='region-filter',
                options=[{"label": r, "value": r} for r in sorted(df['region'].unique())],
                value=[],
                multi=True,
                placeholder="Select regions..."
            )
        ], style={"width": "20%", "display": "inline-block", "padding": "10px"}),

        html.Div([
            html.P("Category:"),
            dcc.Dropdown(
                id='category-filter',
                options=[{"label": c, "value": c} for c in sorted(df['category'].unique())],
                value=[],
                multi=True,
                placeholder="Select categories..."
            )
        ], style={"width": "20%", "display": "inline-block", "padding": "10px"}),

        html.Div([
            html.P("Supplier:"),
            dcc.Dropdown(
                id='supplier-filter',
                options=[{"label": s, "value": s} for s in sorted(df['supplier'].unique())],
                value=[],
                multi=True,
                placeholder="Select suppliers..."
            )
        ], style={"width": "20%", "display": "inline-block", "padding": "10px"}),

        html.Div([
            html.P("Show Anomalies Only:"),
            dcc.RadioItems(
                id='anomaly-filter',
                options=[
                    {'label': 'All Data', 'value': 'all'},
                    {'label': 'Anomalies Only', 'value': 'anomalies'}
                ],
                value='all',
                labelStyle={'display': 'inline-block', 'margin-right': '10px'}
            )
        ], style={"width": "15%", "display": "inline-block", "padding": "10px"})
    ], style={"background-color": "#f2f2f2", "padding": "10px", "margin": "10px 0px"}),

    # KPI Cards
    html.Div([
        html.Div([
            html.H4("Total Records", style={"text-align": "center"}),
            html.P(id="total-records", style={"text-align": "center", "font-size": "24px", "font-weight": "bold"})
        ], className="kpi-card"),

        html.Div([
            html.H4("Detected Anomalies", style={"text-align": "center"}),
            html.P(id="total-anomalies", style={"text-align": "center", "font-size": "24px", "font-weight": "bold", "color": "#e74c3c"})
        ], className="kpi-card"),

        html.Div([
            html.H4("Anomaly Rate", style={"text-align": "center"}),
            html.P(id="anomaly-rate", style={"text-align": "center", "font-size": "24px", "font-weight": "bold"})
        ], className="kpi-card"),

        html.Div([
            html.H4("Most Affected Region", style={"text-align": "center"}),
            html.P(id="most-affected-region", style={"text-align": "center", "font-size": "24px", "font-weight": "bold"})
        ], className="kpi-card"),
    ], style={"display": "flex", "justify-content": "space-between", "margin": "20px 0px"}),

    # Main charts row
    html.Div([
        # Left column - Time series
        html.Div([
            html.H3("Supply Chain Metrics Over Time", style={"text-align": "center"}),
            dcc.Dropdown(
                id='metric-selector',
                options=[
                    {'label': 'Lead Time (days)', 'value': 'lead_time_days'},
                    {'label': 'Transport Time (days)', 'value': 'transport_time_days'},
                    {'label': 'Inventory Coverage (days)', 'value': 'inventory_coverage_days'},
                    {'label': 'Capacity Utilization (%)', 'value': 'capacity_utilization'},
                    {'label': 'Demand Forecast', 'value': 'demand_forecast'},
                    {'label': 'Order Quantity', 'value': 'order_quantity'}
                ],
                value='lead_time_days',
                clearable=False
            ),
            dcc.Graph(id="time-series-chart")
        ], style={"width": "49%", "display": "inline-block", "vertical-align": "top"}),

        # Right column - Anomaly distribution
        html.Div([
            html.H3("Anomaly Distribution by Category", style={"text-align": "center"}),
            dcc.Graph(id="category-anomaly-chart")
        ], style={"width": "49%", "display": "inline-block", "vertical-align": "top"})
    ]),

    # Second charts row
    html.Div([
        # Left column - Geographic distribution
        html.Div([
            html.H3("Regional Anomaly Distribution", style={"text-align": "center"}),
            dcc.Graph(id="regional-chart")
        ], style={"width": "49%", "display": "inline-block", "vertical-align": "top"}),

        # Right column - Correlation matrix
        html.Div([
            html.H3("Feature Correlation Matrix", style={"text-align": "center"}),
            dcc.Graph(id="correlation-chart")
        ], style={"width": "49%", "display": "inline-block", "vertical-align": "top"})
    ]),

    # Anomaly table
    html.Div([
        html.H3("Detected Anomalies", style={"text-align": "center"}),
        dash_table.DataTable(
            id='anomaly-table',
            columns=[
                {"name": "Date", "id": "date"},
                {"name": "Region", "id": "region"},
                {"name": "Category", "id": "category"},
                {"name": "Supplier", "id": "supplier"},
                {"name": "Lead Time", "id": "lead_time_days"},
                {"name": "Transport Time", "id": "transport_time_days"},
                {"name": "Inventory Coverage", "id": "inventory_coverage_days"},
                {"name": "Capacity Utilization", "id": "capacity_utilization"},
                {"name": "Risk Score", "id": "risk_score"}
            ],
            style_table={'overflowX': 'auto'},
            style_cell={
                'textAlign': 'left',
                'padding': '5px',
                'minWidth': '100px'
            },
            style_header={
                'backgroundColor': '#2c3e50',
                'color': 'white',
                'fontWeight': 'bold'
            },
            style_data_conditional=[
                {
                    'if': {'column_id': 'risk_score', 'filter_query': '{risk_score} > 80'},
                    'backgroundColor': '#e74c3c',
                    'color': 'white'
                },
                {
                    'if': {'column_id': 'risk_score', 'filter_query': '{risk_score} > 50 && {risk_score} <= 80'},
                    'backgroundColor': '#f39c12',
                    'color': 'white'
                },
                {
                    'if': {'column_id': 'risk_score', 'filter_query': '{risk_score} <= 50'},
                    'backgroundColor': '#27ae60',
                    'color': 'white'
                }
            ],
            page_size=10
        )
    ], style={"margin": "20px 0px"}),

    # Footer
    html.Div([
        html.P("FMCG Supply Chain Anomaly Detection Dashboard | Hackathon Project",
               style={"margin-bottom": "0px", "color": "white"})
    ], style={"text-align": "center", "padding": "1rem", "background-color": "#2c3e50", "margin-top": "20px"})
], style={"max-width": "1200px", "margin": "0 auto", "font-family": "Arial, sans-serif"})

# Add custom CSS
app.index_string = '''
<!DOCTYPE html>
<html>
    <head>
        {%metas%}
        <title>{%title%}</title>
        {%favicon%}
        {%css%}
        <style>
            .kpi-card {
                background-color: white;
                border-radius: 5px;
                box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
                padding: 15px;
                width: 23%;
            }

            @media (max-width: 768px) {
                .kpi-card {
                    width: 48%;
                    margin-bottom: 10px;
                }
            }
        </style>
    </head>
    <body>
        {%app_entry%}
        <footer>
            {%config%}
            {%scripts%}
            {%renderer%}
        </footer>
    </body>
</html>
'''


# Define callback functions
@app.callback(
    [Output("total-records", "children"),
     Output("total-anomalies", "children"),
     Output("anomaly-rate", "children"),
     Output("most-affected-region", "children")],
    [Input("date-range", "start_date"),
     Input("date-range", "end_date"),
     Input("region-filter", "value"),
     Input("category-filter", "value"),
     Input("supplier-filter", "value"),
     Input("anomaly-filter", "value")]
)
def update_kpis(start_date, end_date, regions, categories, suppliers, anomaly_filter):
    # Filter data based on inputs
    filtered_df = df[
        (df['date'] >= start_date) &
        (df['date'] <= end_date)
    ]

    if regions:
        filtered_df = filtered_df[filtered_df['region'].isin(regions)]
    if categories:
        filtered_df = filtered_df[filtered_df['category'].isin(categories)]
    if suppliers:
        filtered_df = filtered_df[filtered_df['supplier'].isin(suppliers)]

    if anomaly_filter == 'anomalies':
        filtered_df = filtered_df[filtered_df['anomaly_detected'] == 1]

    # Calculate KPIs
    total_records = len(filtered_df)
    total_anomalies = filtered_df['anomaly_detected'].sum()
    anomaly_rate = f"{(total_anomalies / total_records * 100):.2f}%" if total_records > 0 else "0.00%"

    # Most affected region
    if total_anomalies > 0:
        most_affected_region = filtered_df[filtered_df['anomaly_detected'] == 1]['region'].mode()[0]
    else:
        most_affected_region = "N/A"

    return total_records, total_anomalies, anomaly_rate, most_affected_region


@app.callback(
    Output("time-series-chart", "figure"),
    [Input("date-range", "start_date"),
     Input("date-range", "end_date"),
     Input("region-filter", "value"),
     Input("category-filter", "value"),
     Input("supplier-filter", "value"),
     Input("anomaly-filter", "value"),
     Input("metric-selector", "value")]
)
def update_time_series(start_date, end_date, regions, categories, suppliers, anomaly_filter, metric):
    # Filter data
    filtered_df = df[
        (df['date'] >= start_date) &
        (df['date'] <= end_date)
    ]

    if regions:
        filtered_df = filtered_df[filtered_df['region'].isin(regions)]
    if categories:
        filtered_df = filtered_df[filtered_df['category'].isin(categories)]
    if suppliers:
        filtered_df = filtered_df[filtered_df['supplier'].isin(suppliers)]

    if anomaly_filter == 'anomalies':
        filtered_df = filtered_df[filtered_df['anomaly_detected'] == 1]

    # Create time series chart
    fig = px.line(
        filtered_df,
        x="date",
        y=metric,
        title=f"{metric.replace('_', ' ').title()} Over Time",
        labels={"date": "Date", metric: metric.replace('_', ' ').title()}
    )

    # Highlight anomalies
    if anomaly_filter == 'all':
        anomalies_df = filtered_df[filtered_df['anomaly_detected'] == 1]
        fig.add_trace(
            go.Scatter(
                x=anomalies_df['date'],
                y=anomalies_df[metric],
                mode='markers',
                marker=dict(color='red', size=8),
                name='Anomalies'
            )
        )

    fig.update_layout(
        xaxis_title="Date",
        yaxis_title=metric.replace('_', ' ').title(),
        hovermode="x unified"
    )

    return fig


@app.callback(
    Output("category-anomaly-chart", "figure"),
    [Input("date-range", "start_date"),
     Input("date-range", "end_date"),
     Input("region-filter", "value"),
     Input("category-filter", "value"),
     Input("supplier-filter", "value"),
     Input("anomaly-filter", "value")]
)
def update_category_anomaly_chart(start_date, end_date, regions, categories, suppliers, anomaly_filter):
    # Filter data
    filtered_df = df[
        (df['date'] >= start_date) &
        (df['date'] <= end_date)
    ]

    if regions:
        filtered_df = filtered_df[filtered_df['region'].isin(regions)]
    if categories:
        filtered_df = filtered_df[filtered_df['category'].isin(categories)]
    if suppliers:
        filtered_df = filtered_df[filtered_df['supplier'].isin(suppliers)]

    if anomaly_filter == 'anomalies':
        filtered_df = filtered_df[filtered_df['anomaly_detected'] == 1]

    # Group by category and count anomalies
    category_anomalies = filtered_df.groupby('category')['anomaly_detected'].sum().reset_index()

    # Create bar chart
    fig = px.bar(
        category_anomalies,
        x="category",
        y="anomaly_detected",
        title="Anomalies by Category",
        labels={"category": "Category", "anomaly_detected": "Number of Anomalies"}
    )

    fig.update_layout(
        xaxis_title="Category",
        yaxis_title="Number of Anomalies"
    )

    return fig


@app.callback(
    Output("regional-chart", "figure"),
    [Input("date-range", "start_date"),
     Input("date-range", "end_date"),
     Input("region-filter", "value"),
     Input("category-filter", "value"),
     Input("supplier-filter", "value"),
     Input("anomaly-filter", "value")]
)
def update_regional_chart(start_date, end_date, regions, categories, suppliers, anomaly_filter):
    # Filter data
    filtered_df = df[
        (df['date'] >= start_date) &
        (df['date'] <= end_date)
    ]

    if regions:
        filtered_df = filtered_df[filtered_df['region'].isin(regions)]
    if categories:
        filtered_df = filtered_df[filtered_df['category'].isin(categories)]
    if suppliers:
        filtered_df = filtered_df[filtered_df['supplier'].isin(suppliers)]

    if anomaly_filter == 'anomalies':
        filtered_df = filtered_df[filtered_df['anomaly_detected'] == 1]

    # Group by region and count anomalies
    regional_anomalies = filtered_df.groupby('region')['anomaly_detected'].sum().reset_index()

    # Create bar chart
    fig = px.bar(
        regional_anomalies,
        x="region",
        y="anomaly_detected",
        title="Anomalies by Region",
        labels={"region": "Region", "anomaly_detected": "Number of Anomalies"}
    )

    fig.update_layout(
        xaxis_title="Region",
        yaxis_title="Number of Anomalies"
    )

    return fig


@app.callback(
    Output("correlation-chart", "figure"),
    [Input("date-range", "start_date"),
     Input("date-range", "end_date"),
     Input("region-filter", "value"),
     Input("category-filter", "value"),
     Input("supplier-filter", "value"),
     Input("anomaly-filter", "value")]
)
def update_correlation_chart(start_date, end_date, regions, categories, suppliers, anomaly_filter):
    # Filter data
    filtered_df = df[
        (df['date'] >= start_date) &
        (df['date'] <= end_date)
    ]

    if regions:
        filtered_df = filtered_df[filtered_df['region'].isin(regions)]
    if categories:
        filtered_df = filtered_df[filtered_df['category'].isin(categories)]
    if suppliers:
        filtered_df = filtered_df[filtered_df['supplier'].isin(suppliers)]

    if anomaly_filter == 'anomalies':
        filtered_df = filtered_df[filtered_df['anomaly_detected'] == 1]

    # Select numerical features for correlation
    numerical_features = [
        'order_quantity', 'lead_time_days', 'transport_time_days',
        'inventory_level', 'demand_forecast', 'production_capacity',
        'inventory_coverage_days', 'capacity_utilization'
    ]

    corr = filtered_df[numerical_features].corr()

    # Create heatmap
    fig = go.Figure(
        data=go.Heatmap(
            z=corr.values,
            x=corr.columns,
            y=corr.columns,
            colorscale='Viridis',
            zmin=-1,
            zmax=1
        )
    )

    fig.update_layout(
        title="Feature Correlation Matrix",
        xaxis_title="Features",
        yaxis_title="Features"
    )

    return fig


@app.callback(
    Output("anomaly-table", "data"),
    [Input("date-range", "start_date"),
     Input("date-range", "end_date"),
     Input("region-filter", "value"),
     Input("category-filter", "value"),
     Input("supplier-filter", "value"),
     Input("anomaly-filter", "value")]
)
def update_anomaly_table(start_date, end_date, regions, categories, suppliers, anomaly_filter):
    # Filter data
    filtered_df = df[
        (df['date'] >= start_date) &
        (df['date'] <= end_date)
    ]

    if regions:
        filtered_df = filtered_df[filtered_df['region'].isin(regions)]
    if categories:
        filtered_df = filtered_df[filtered_df['category'].isin(categories)]
    if suppliers:
        filtered_df = filtered_df[filtered_df['supplier'].isin(suppliers)]

    if anomaly_filter == 'anomalies':
        filtered_df = filtered_df[filtered_df['anomaly_detected'] == 1]

    # Add a risk score (for demo purposes)
    filtered_df['risk_score'] = np.random.randint(0, 100, size=len(filtered_df))

    # Prepare data for table
    table_data = filtered_df[[
        'date', 'region', 'category', 'supplier',
        'lead_time_days', 'transport_time_days',
        'inventory_coverage_days', 'capacity_utilization',
        'risk_score'
    ]].to_dict('records')

    return table_data


# Run the app
if __name__ == "__main__":
    app.server.run(debug=True)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat
