In [2]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

np.random.seed(42)

num_records = 5000

banks = [
    "Citibank", "HSBC", "JPMorgan Chase", "Bank of America", "Standard Chartered",
    "Deutsche Bank", "BNP Paribas", "Barclays", "Credit Suisse", "UBS",
    "Emirates NBD", "Abu Dhabi Commercial Bank", "First Abu Dhabi Bank", "Dubai Islamic Bank",
    "Mashreq Bank", "Bank of China", "Industrial and Commercial Bank of China", "Wells Fargo",
    "Goldman Sachs", "Morgan Stanley", "Royal Bank of Canada", "Bank of Tokyo-Mitsubishi UFJ"
]

uae_banks = [
    "Emirates NBD", "Abu Dhabi Commercial Bank", "First Abu Dhabi Bank", 
    "Dubai Islamic Bank", "Mashreq Bank", "Commercial Bank of Dubai", 
    "Abu Dhabi Islamic Bank", "RAK Bank", "National Bank of Fujairah",
    "Sharjah Islamic Bank", "United Arab Bank", "National Bank of Umm Al Qaiwain"
]

currencies = ["USD", "EUR", "GBP", "AED", "JPY", "CHF", "CNY", "CAD", "AUD", "SGD"]
mt_types = ["MT103", "MT202", "MT700", "MT760", "MT799", "MT910", "MT940", "MT950"]
directions = ["Outgoing", "Incoming"]
categories = ["Trade Finance", "Corporate Payment", "Retail Payment", "Interbank Settlement", 
              "Treasury Operations", "Investment", "Loan Disbursement", "FX Settlement"]

regions = [
    "North America", "Europe", "Asia Pacific", "Middle East", "Africa", 
    "Latin America", "Caribbean", "Central Asia", "South Asia", "Southeast Asia"
]

def random_date(start_year=2015, end_year=2023):
    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)
    time_between_dates = end_date - start_date
    days_between_dates = time_between_dates.days
    random_number_of_days = random.randrange(days_between_dates)
    return start_date + timedelta(days=random_number_of_days)

data = []
for i in range(num_records):
    # Determine if receiver is UAE-based (75% probability)
    is_uae_receiver = np.random.choice([True, False], p=[0.75, 0.25])

    sender = np.random.choice(banks)
    if is_uae_receiver:
        receiver = np.random.choice(uae_banks)
        receiver_region = "Middle East"  # UAE is in Middle East
    else:

        possible_receivers = [bank for bank in banks if bank != sender]
        receiver = np.random.choice(possible_receivers)
        receiver_region = np.random.choice([r for r in regions if r != "Middle East"])

    count = i + 1
    amount = round(np.random.lognormal(mean=10, sigma=1.5), 2)  
    currency = np.random.choice(currencies, p=[0.4, 0.2, 0.1, 0.15, 0.05, 0.02, 0.03, 0.02, 0.02, 0.01])  # USD most common
    mt = np.random.choice(mt_types)
    year = random_date().year
    direction = np.random.choice(directions)
    category = np.random.choice(categories)
    send_region = np.random.choice(regions)
    mx_mt = "MT"  # All are MT in this simulation

    # Add to data
    data.append([
        count, sender, receiver, amount, currency, mt, year, direction, 
        category, send_region, receiver_region, mx_mt
    ])

columns = ['Count', 'sender', 'receiver', 'amount', 'currency', 'Mt', 'year', 
           'direction', 'category', 'send region', 'receiver region', 'mx/mt']
df = pd.DataFrame(data, columns=columns)

for idx, row in df.iterrows():
    if row['currency'] == 'JPY':
        df.at[idx, 'amount'] = round(row['amount'] * 100)  
    elif row['currency'] == 'AED':
        df.at[idx, 'amount'] = round(row['amount'] * 3.67, 2)  
    elif row['currency'] == 'EUR':
        df.at[idx, 'amount'] = round(row['amount'] * 0.85, 2)
    elif row['currency'] == 'GBP':
        df.at[idx, 'amount'] = round(row['amount'] * 0.75, 2)

df

Unnamed: 0,Count,sender,receiver,amount,currency,Mt,year,direction,category,send region,receiver region,mx/mt
0,1,Mashreq Bank,United Arab Bank,38111.53,EUR,MT940,2021,Outgoing,Retail Payment,Central Asia,Middle East,MT
1,2,Barclays,RAK Bank,128668.30,GBP,MT799,2019,Incoming,FX Settlement,Latin America,Middle East,MT
2,3,Royal Bank of Canada,Emirates NBD,26914.61,EUR,MT103,2021,Outgoing,Retail Payment,Southeast Asia,Middle East,MT
3,4,Bank of China,First Abu Dhabi Bank,1248.98,USD,MT760,2020,Incoming,Trade Finance,Asia Pacific,Middle East,MT
4,5,BNP Paribas,Bank of Tokyo-Mitsubishi UFJ,6710.04,GBP,MT940,2019,Incoming,Loan Disbursement,Central Asia,Southeast Asia,MT
...,...,...,...,...,...,...,...,...,...,...,...,...
4995,4996,Citibank,First Abu Dhabi Bank,8047.40,GBP,MT799,2021,Outgoing,Investment,Central Asia,Middle East,MT
4996,4997,Emirates NBD,National Bank of Fujairah,4855.98,USD,MT103,2016,Outgoing,Loan Disbursement,Caribbean,Middle East,MT
4997,4998,Industrial and Commercial Bank of China,Mashreq Bank,12863.84,USD,MT103,2017,Incoming,Treasury Operations,Asia Pacific,Southeast Asia,MT
4998,4999,Bank of America,Abu Dhabi Islamic Bank,5341756.00,JPY,MT910,2015,Incoming,Investment,Latin America,Middle East,MT


In [3]:
status_choices = ["Completed", "Pending", "Failed", "Cancelled"]
status_probs = [0.85, 0.08, 0.05, 0.02]
df['transaction_status'] = np.random.choice(status_choices, size=len(df), p=status_probs)
payment_methods = ["Wire Transfer", "ACH", "SWIFT", "Check", "Cash", "Mobile Payment", "Credit Card"]
probabilities = [0.4, 0.25, 0.15, 0.05, 0.05, 0.05, 0.05]

df['payment_method'] = np.random.choice(payment_methods, size=len(df), p=probabilities)



In [4]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Define region coordinates (approximate centroids)
region_coordinates = {
    "North America": {"lat": 40.0, "lon": -100.0},
    "Europe": {"lat": 50.0, "lon": 10.0},
    "Asia Pacific": {"lat": 30.0, "lon": 120.0},
    "Middle East": {"lat": 27.0, "lon": 45.0},
    "Africa": {"lat": 5.0, "lon": 20.0},
    "Latin America": {"lat": -10.0, "lon": -60.0},
    "Caribbean": {"lat": 20.0, "lon": -75.0},
    "Central Asia": {"lat": 45.0, "lon": 65.0},
    "South Asia": {"lat": 20.0, "lon": 75.0},
    "Southeast Asia": {"lat": 10.0, "lon": 105.0}
}

# Function to prepare flow data between regions
def prepare_region_flow_data(df):
    # Group by sender and receiver regions
    grouped = df.groupby(['send region', 'receiver region'])['amount'].sum().reset_index()
    
    # Create a copy to avoid modifying the original
    flow_data = grouped.copy()
    
    # Add coordinates for source and target regions
    flow_data['source_lat'] = flow_data['send region'].apply(lambda x: region_coordinates[x]['lat'])
    flow_data['source_lon'] = flow_data['send region'].apply(lambda x: region_coordinates[x]['lon'])
    flow_data['target_lat'] = flow_data['receiver region'].apply(lambda x: region_coordinates[x]['lat'])
    flow_data['target_lon'] = flow_data['receiver region'].apply(lambda x: region_coordinates[x]['lon'])
    
    return flow_data

# Function to create choropleth map with flows
def create_region_flow_map(df):
    # Prepare flow data
    flow_data = prepare_region_flow_data(df)
    
    # Calculate total inflows and outflows for each region
    inflows = df.groupby('receiver region')['amount'].sum().reset_index()
    inflows.columns = ['region', 'inflow']
    
    outflows = df.groupby('send region')['amount'].sum().reset_index()
    outflows.columns = ['region', 'outflow']
    
    # Merge inflows and outflows
    region_flows = pd.merge(inflows, outflows, on='region', how='outer').fillna(0)
    
    # Calculate net flow (inflow - outflow)
    region_flows['net_flow'] = region_flows['inflow'] - region_flows['outflow']
    region_flows['total_volume'] = region_flows['inflow'] + region_flows['outflow']
    
    # Create coordinates dataframe for the choropleth
    region_coords = pd.DataFrame(region_coordinates).T.reset_index()
    region_coords.columns = ['region', 'lat', 'lon']
    
    # Merge coordinates with flow data
    region_data = pd.merge(region_flows, region_coords, on='region', how='left')
    
    # Normalize flow values for better visualization
    max_amount = flow_data['amount'].max()
    flow_data['normalized_amount'] = flow_data['amount'] / max_amount * 10
    
    # Create figure with map
    fig = go.Figure()
    
    # Add base map (with natural earth projection)
    fig.add_trace(go.Choropleth(
        locationmode='country names',
        z=[0],  # Placeholder, we'll use a scatter plot for the regions
        showscale=False,
        colorscale='Blues',
        marker_line_color='darkgray',
        marker_line_width=0.5,
    ))
    
    # Add scatter points for regions (sized by total transaction volume)
    fig.add_trace(go.Scattergeo(
        lon=region_data['lon'],
        lat=region_data['lat'],
        text=region_data['region'] + '<br>Inflow: $' + region_data['inflow'].round(2).astype(str) + 
             '<br>Outflow: $' + region_data['outflow'].round(2).astype(str),
        mode='markers',
        marker=dict(
            size=region_data['total_volume'] / region_data['total_volume'].max() * 30 + 10,
            color=region_data['net_flow'],
            colorscale='RdBu',
            colorbar=dict(
                title='Net Flow<br>(Inflow - Outflow)',
                thickness=15
            ),
            cmid=0,  # Center colorscale at 0
            line=dict(width=1, color='black')
        ),
        name='Regions'
    ))
    
    # Add flow lines between regions
    for _, row in flow_data.iterrows():
        # Skip flows below threshold to avoid cluttering
        if row['normalized_amount'] < 0.5:
            continue
            
        # Create curved path for the flow lines
        fig.add_trace(go.Scattergeo(
            lon=[row['source_lon'], None, row['target_lon']],
            lat=[row['source_lat'], None, row['target_lat']],
            mode='lines',
            line=dict(
                width=row['normalized_amount'],
                color='rgba(80, 80, 220, 0.5)'
            ),
            opacity=0.7,
            hoverinfo='none',
            showlegend=False
        ))
    
    # Update layout
    fig.update_layout(
        title='Global Banking Transaction Flows Between Regions',
        geo=dict(
            projection_type='natural earth',
            showland=True,
            landcolor='rgb(243, 243, 243)',
            countrycolor='rgb(204, 204, 204)',
            showcountries=True,
            oceancolor='rgba(220, 230, 255, 1)',
            showocean=True,
            lakecolor='rgb(255, 255, 255)',
            showlakes=True,
            showcoastlines=True,
            coastlinecolor='rgb(80, 80, 80)',
            coastlinewidth=0.5
        ),
        width=1200,
        height=700,
        margin=dict(l=0, r=0, t=50, b=0)
    )
    
    return fig

# Alternative version using mapbox for more detailed map tiles
def create_mapbox_flow_map(df, mapbox_token=None):
    # Prepare flow data
    flow_data = prepare_region_flow_data(df)
    
    # Calculate total inflows and outflows for each region
    inflows = df.groupby('receiver region')['amount'].sum().reset_index()
    inflows.columns = ['region', 'inflow']
    
    outflows = df.groupby('send region')['amount'].sum().reset_index()
    outflows.columns = ['region', 'outflow']
    
    # Merge inflows and outflows
    region_flows = pd.merge(inflows, outflows, on='region', how='outer').fillna(0)
    
    # Calculate net flow (inflow - outflow)
    region_flows['net_flow'] = region_flows['inflow'] - region_flows['outflow']
    region_flows['total_volume'] = region_flows['inflow'] + region_flows['outflow']
    
    # Create coordinates dataframe for the regions
    region_coords = pd.DataFrame(region_coordinates).T.reset_index()
    region_coords.columns = ['region', 'lat', 'lon']
    
    # Merge coordinates with flow data
    region_data = pd.merge(region_flows, region_coords, on='region', how='left')
    
    # Normalize flow values for better visualization
    max_amount = flow_data['amount'].max()
    flow_data['normalized_amount'] = flow_data['amount'] / max_amount * 10
    
    # Create figure with mapbox background
    fig = go.Figure()
    
    # Add flows as custom shapes (arcs between regions)
    for _, row in flow_data.iterrows():
        # Skip flows below threshold to reduce clutter
        if row['normalized_amount'] < 0.5:
            continue
        
        # Calculate midpoint for the curve (with slight offset for visualization)
        lon_diff = row['target_lon'] - row['source_lon']
        lat_diff = row['target_lat'] - row['source_lat']
        
        # Adjust midpoint upward based on distance
        distance = np.sqrt(lon_diff**2 + lat_diff**2)
        midpoint_offset = min(distance * 0.15, 10)  # Cap the offset
        
        # Generate curve points
        curve_points = []
        steps = 20
        for i in range(steps + 1):
            t = i / steps
            # Parametric curve formula
            x = (1-t)**2 * row['source_lon'] + 2*(1-t)*t * ((row['source_lon'] + row['target_lon'])/2) + t**2 * row['target_lon']
            y = (1-t)**2 * row['source_lat'] + 2*(1-t)*t * ((row['source_lat'] + row['target_lat'])/2 + midpoint_offset) + t**2 * row['target_lat']
            curve_points.append((x, y))
        
        # Extract lons and lats from points
        lons, lats = zip(*curve_points)
        
        # Create curve as a line
        fig.add_trace(go.Scattermapbox(
            lon=lons,
            lat=lats,
            mode='lines',
            line=dict(
                width=row['normalized_amount'] * 1.5,
                color=f'rgba(70, 130, 180, {min(0.8, 0.3 + row["normalized_amount"] * 0.05)})'
            ),
            hoverinfo='text',
            hovertext=f"{row['send region']} → {row['receiver region']}<br>Amount: ${row['amount']:,.2f}",
            showlegend=False
        ))
    
    # Add regions as points
    fig.add_trace(go.Scattermapbox(
        lon=region_data['lon'],
        lat=region_data['lat'],
        text=region_data['region'],
        customdata=np.dstack((
            region_data['inflow'].round(2),
            region_data['outflow'].round(2),
            region_data['net_flow'].round(2)
        ))[0],
        hovertemplate='<b>%{text}</b><br>Inflow: $%{customdata[0]:,.2f}<br>Outflow: $%{customdata[1]:,.2f}<br>Net Flow: $%{customdata[2]:,.2f}',
        mode='markers',
        marker=dict(
            size=region_data['total_volume'] / region_data['total_volume'].max() * 25 + 10,
            color=region_data['net_flow'],
            colorscale='RdBu',
            colorbar=dict(
                title='Net Flow<br>(Inflow - Outflow)',
                thickness=15
            ),
            cmid=0,  # Center colorscale at 0
            opacity=0.8
        ),
        name='Regions'
    ))
    
    # Set mapbox style
    mapbox = dict(
        style='carto-positron',  # Use Carto base map (no token needed)
        zoom=1.2,
        center=dict(lat=20, lon=0)
    )
    
    # If mapbox token is provided, use mapbox style
    if mapbox_token:
        mapbox['accesstoken'] = mapbox_token
        mapbox['style'] = 'mapbox://styles/mapbox/light-v10'
    
    # Update layout
    fig.update_layout(
        title='Global Banking Transaction Flows Between Regions',
        mapbox=mapbox,
        width=1200,
        height=700,
        margin=dict(l=0, r=0, t=50, b=0)
    )
    
    return fig

# Example usage
def visualize_banking_flows_on_map(df):
    # Create version with natural earth projection
    import plotly.io as pio
    pio.renderers.default = 'browser'
    geo_fig = create_region_flow_map(df)
    geo_fig.write_html("banking_flows_map.html")
    
    # Create version with mapbox tiles (no token required for carto base maps)
    mapbox_fig = create_mapbox_flow_map(df)
    mapbox_fig.write_html("banking_flows_mapbox.html")
    
    print("Map visualizations created and saved as HTML files:")
    print("- banking_flows_map.html")
    print("- banking_flows_mapbox.html")
    
    # Return the mapbox figure as default
    return mapbox_fig

# Run the visualization with our dataframe
flow_map = visualize_banking_flows_on_map(df)
flow_map 


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



Map visualizations created and saved as HTML files:
- banking_flows_map.html
- banking_flows_mapbox.html


In [5]:
df.columns

Index(['Count', 'sender', 'receiver', 'amount', 'currency', 'Mt', 'year',
       'direction', 'category', 'send region', 'receiver region', 'mx/mt',
       'transaction_status', 'payment_method'],
      dtype='object')

In [6]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from ipywidgets import widgets, interactive, Layout
from IPython.display import display

# Region coordinates (approximate centroids)
region_coordinates = {
    "North America": {"lat": 40.0, "lon": -100.0},
    "Europe": {"lat": 50.0, "lon": 10.0},
    "Asia Pacific": {"lat": 30.0, "lon": 120.0},
    "Middle East": {"lat": 27.0, "lon": 45.0},
    "Africa": {"lat": 5.0, "lon": 20.0},
    "Latin America": {"lat": -10.0, "lon": -60.0},
    "Caribbean": {"lat": 20.0, "lon": -75.0},
    "Central Asia": {"lat": 45.0, "lon": 65.0},
    "South Asia": {"lat": 20.0, "lon": 75.0},
    "Southeast Asia": {"lat": 10.0, "lon": 105.0}
}

def prepare_region_flow_data(df):
    flow_data = df.groupby(['send region', 'receiver region'])['amount'].sum().reset_index()
    for coord in ['lat', 'lon']:
        flow_data[f'source_{coord}'] = flow_data['send region'].map(lambda x: region_coordinates.get(x, {}).get(coord))
        flow_data[f'target_{coord}'] = flow_data['receiver region'].map(lambda x: region_coordinates.get(x, {}).get(coord))
    return flow_data

def create_flow_map(df, mapbox_token=None):
    flow_data = prepare_region_flow_data(df)
    region_flows = pd.merge(
        df.groupby('receiver region')['amount'].sum().rename('inflow').reset_index(),
        df.groupby('send region')['amount'].sum().rename('outflow').reset_index(),
        left_on='receiver region', right_on='send region', how='outer'
    ).fillna(0).rename(columns={'receiver region': 'region'}).drop('send region', axis=1)
    
    region_flows['net_flow'] = region_flows['inflow'] - region_flows['outflow']
    region_flows['total_volume'] = region_flows['inflow'] + region_flows['outflow']
    region_data = pd.merge(region_flows, pd.DataFrame(region_coordinates).T.reset_index().rename(columns={'index': 'region'}), on='region')
    
    flow_data['normalized_amount'] = flow_data['amount'] / flow_data['amount'].max() * 10
    fig = go.Figure()
    
    # Add flow lines
    for idx, row in flow_data[flow_data['normalized_amount'] >= 0.5].iterrows():
        # Check for None or NaN values in coordinates
        if (pd.isna(row['source_lon']) or pd.isna(row['source_lat']) or 
            pd.isna(row['target_lon']) or pd.isna(row['target_lat'])):
            continue
            
        lon_diff, lat_diff = row['target_lon'] - row['source_lon'], row['target_lat'] - row['source_lat']
        midpoint_offset = min(np.sqrt(lon_diff**2 + lat_diff**2) * 0.15, 10)
        curve_points = [(row['source_lon'], row['source_lat'])] + [
            ((1-t)**2 * row['source_lon'] + 2*(1-t)*t * ((row['source_lon'] + row['target_lon'])/2) + t**2 * row['target_lon'],
             (1-t)**2 * row['source_lat'] + 2*(1-t)*t * ((row['source_lat'] + row['target_lat'])/2 + midpoint_offset) + t**2 * row['target_lat'])
            for t in np.linspace(0, 1, 20)[1:-1]
        ] + [(row['target_lon'], row['target_lat'])]
        
        lons, lats = zip(*curve_points)
        # Fix the color transparency value
        alpha = min(0.8, 0.3 + row["normalized_amount"] * 0.05)
        color = f'rgba(70, 130, 180, {alpha:.2f})'
        
        # Create a custom_data field for identifying the flow when clicked
        custom_data = [row['send region'], row['receiver region'], row['amount']]
        
        fig.add_trace(go.Scattermapbox(
            lon=lons, lat=lats, mode='lines',
            line=dict(width=row['normalized_amount']*1.5, color=color),
            hoverinfo='text', 
            hovertext=f"{row['send region']} → {row['receiver region']}<br>Amount: ${row['amount']:,.2f}",
            customdata=[custom_data] * len(lons),  # Repeat the custom data for each point
            showlegend=False
        ))
    
    # Add regions
    # Filter out any rows with NaN coordinates
    valid_region_data = region_data.dropna(subset=['lon', 'lat'])
    if not valid_region_data.empty:
        fig.add_trace(go.Scattermapbox(
            lon=valid_region_data['lon'], 
            lat=valid_region_data['lat'], 
            text=valid_region_data['region'],
            customdata=valid_region_data[['inflow', 'outflow', 'net_flow', 'region']].values,
            hovertemplate='<b>%{text}</b><br>Inflow: $%{customdata[0]:,.2f}<br>Outflow: $%{customdata[1]:,.2f}<br>Net Flow: $%{customdata[2]:,.2f}',
            mode='markers',
            marker=dict(
                size=valid_region_data['total_volume'] / valid_region_data['total_volume'].max() * 25 + 10,
                color=valid_region_data['net_flow'], 
                colorscale='RdBu', 
                cmid=0, 
                opacity=0.8,
                colorbar=dict(title='Net Flow<br>(Inflow - Outflow)', thickness=15)
            ),
            name='Regions'
        ))
    
    fig.update_layout(
        title='Global Banking Transaction Flows Between Regions',
        mapbox=dict(
            style='carto-positron',
            zoom=1.2,
            center=dict(lat=20, lon=0),
            **({'accesstoken': mapbox_token, 'style': 'mapbox://styles/mapbox/light-v10'} if mapbox_token else {})
        ),
        width=1200, height=700, margin=dict(l=0, r=0, t=50, b=0)
    )
    
    return fig

def create_sankey_for_regions(df, send_region=None, receive_region=None, year=None):
    """
    Create a Sankey diagram for transactions between specified regions,
    showing a flow from sender → receiver → currency → mx/mt → mt → direction → amount
    without showing the regions themselves as nodes.
    
    Parameters:
    -----------
    df : pandas DataFrame
        The DataFrame containing transaction data
    send_region : str, optional
        Region to filter senders by
    receive_region : str, optional
        Region to filter receivers by
    year : int, optional
        Year to filter transactions by
    
    Returns:
    --------
    plotly.graph_objects.Figure
        A Sankey diagram figure
    """
    import pandas as pd
    import numpy as np
    import plotly.graph_objects as go
    
    # Create a copy to avoid modifying the original
    filtered_df = df.copy()
    
    # Filter data if regions are specified
    title_parts = []
    
    if send_region:
        filtered_df = filtered_df[filtered_df['send region'] == send_region]
        title_parts.append(f"from {send_region}")
    
    if receive_region:
        filtered_df = filtered_df[filtered_df['receiver region'] == receive_region]
        title_parts.append(f"to {receive_region}")
    
    # Filter by year if specified
    if year and 'year' in filtered_df.columns:
        filtered_df = filtered_df[filtered_df['year'] == year]
        title_parts.append(f"in {year}")
    
    if title_parts:
        title = f"Transactions {' '.join(title_parts)}"
    else:
        title = "All Regional Transactions"
    
    # Ensure amount categories exist (create if missing)
    if 'amount_category' not in filtered_df.columns:
        filtered_df['amount_category'] = pd.cut(
            filtered_df['amount'], 
            bins=[0, 1e6, 5e6, 1e7, float('inf')], 
            labels=['< 1M', '1M-5M', '5M-10M', '> 10M']
        )
    
    # Check if we have data after all the filters
    if filtered_df.empty:
        fig = go.Figure()
        fig.update_layout(
            title_text=f"No data available for {title}",
            annotations=[dict(
                text="No transactions found for the selected criteria",
                xref="paper", yref="paper",
                x=0.5, y=0.5, showarrow=False,
                font=dict(size=20)
            )]
        )
        return fig
    
    # Make sure all columns are properly formatted as strings
    str_columns = ['sender', 'receiver', 'currency', 'mx/mt', 'Mt', 'direction', 'amount_category']
    for col in str_columns:
        if col in filtered_df.columns and filtered_df[col].dtype != 'str':
            filtered_df[col] = filtered_df[col].astype(str)
    
    # Create the flows according to the specified path
    # sender → receiver
    flow1 = filtered_df.groupby(['sender', 'receiver']).size().reset_index(name='count')
    
    # receiver → currency
    flow2 = filtered_df.groupby(['receiver', 'currency']).size().reset_index(name='count')
    
    # currency → mx/mt
    flow3 = filtered_df.groupby(['currency', 'mx/mt']).size().reset_index(name='count')
    
    # mx/mt → mt
    flow4 = filtered_df.groupby(['mx/mt', 'Mt']).size().reset_index(name='count')
    
    # mt → direction
    flow5 = filtered_df.groupby(['Mt', 'direction']).size().reset_index(name='count')
    
    # direction → amount
    flow6 = filtered_df.groupby(['direction', 'amount_category']).size().reset_index(name='count')
    
    # Get all unique labels
    labels = pd.unique(
        flow1['sender'].tolist() +
        flow1['receiver'].tolist() +
        flow2['currency'].tolist() +
        flow3['mx/mt'].tolist() +
        flow4['Mt'].tolist() +
        flow5['direction'].tolist() +
        flow6['amount_category'].astype(str).tolist()
    )
    
    # Map labels to indices
    label_to_index = {label: i for i, label in enumerate(labels)}
    
    # Prepare sources, targets, and values for links
    source, target, value = [], [], []
    link_labels = []
    
    # sender → receiver
    for _, row in flow1.iterrows():
        source.append(label_to_index[row['sender']])
        target.append(label_to_index[row['receiver']])
        value.append(row['count'])
        link_labels.append(f"{row['sender']} → {row['receiver']}<br>Count: {row['count']}")
    
    # receiver → currency
    for _, row in flow2.iterrows():
        source.append(label_to_index[row['receiver']])
        target.append(label_to_index[row['currency']])
        value.append(row['count'])
        link_labels.append(f"{row['receiver']} → {row['currency']}<br>Count: {row['count']}")
    
    # currency → mx/mt
    for _, row in flow3.iterrows():
        source.append(label_to_index[row['currency']])
        target.append(label_to_index[row['mx/mt']])
        value.append(row['count'])
        link_labels.append(f"{row['currency']} → {row['mx/mt']}<br>Count: {row['count']}")
    
    # mx/mt → mt
    for _, row in flow4.iterrows():
        source.append(label_to_index[row['mx/mt']])
        target.append(label_to_index[row['Mt']])
        value.append(row['count'])
        link_labels.append(f"{row['mx/mt']} → {row['Mt']}<br>Count: {row['count']}")
    
    # mt → direction
    for _, row in flow5.iterrows():
        source.append(label_to_index[row['Mt']])
        target.append(label_to_index[row['direction']])
        value.append(row['count'])
        link_labels.append(f"{row['Mt']} → {row['direction']}<br>Count: {row['count']}")
    
    # direction → amount_category
    for _, row in flow6.iterrows():
        source.append(label_to_index[row['direction']])
        target.append(label_to_index[str(row['amount_category'])])
        value.append(row['count'])
        link_labels.append(f"{row['direction']} → {row['amount_category']}<br>Count: {row['count']}")
    
    # Set up colors for nodes and links
    num_nodes = len(labels)
    node_colors = ["rgba(31, 119, 180, 0.8)"] * num_nodes  # Default blue color
    
    # Create color groups for different node types
    sender_idx = [label_to_index[label] for label in flow1['sender'].unique()]
    receiver_idx = [label_to_index[label] for label in flow1['receiver'].unique()]
    currency_idx = [label_to_index[label] for label in flow2['currency'].unique()]
    mxmt_idx = [label_to_index[label] for label in flow3['mx/mt'].unique()]
    mt_idx = [label_to_index[label] for label in flow4['Mt'].unique()]
    direction_idx = [label_to_index[label] for label in flow5['direction'].unique()]
    amount_idx = [label_to_index[str(label)] for label in flow6['amount_category'].unique()]
    
    # Assign different colors to each node type
    for idx in sender_idx:
        node_colors[idx] = "rgba(214, 39, 40, 0.8)"  # Red for senders
    for idx in receiver_idx:
        node_colors[idx] = "rgba(44, 160, 44, 0.8)"  # Green for receivers
    for idx in currency_idx:
        node_colors[idx] = "rgba(255, 127, 14, 0.8)"  # Orange for currency
    for idx in mxmt_idx:
        node_colors[idx] = "rgba(148, 103, 189, 0.8)"  # Purple for mx/mt
    for idx in mt_idx:
        node_colors[idx] = "rgba(140, 86, 75, 0.8)"   # Brown for Mt
    for idx in direction_idx:
        node_colors[idx] = "rgba(23, 190, 207, 0.8)"  # Cyan for directions
    for idx in amount_idx:
        node_colors[idx] = "rgba(31, 119, 180, 0.8)"  # Blue for amount categories
    
    # Create colors for links based on their source
    link_colors = []
    for s in source:
        # Use a slightly transparent version of the source node color
        base_color = node_colors[s].replace("0.8", "0.6")
        link_colors.append(base_color)
    
    # Create the Sankey diagram
    fig = go.Figure(data=[go.Sankey(
        arrangement="snap",
        node=dict(
            pad=20,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=list(labels),
            color=node_colors
        ),
        link=dict(
            source=source,
            target=target,
            value=value,
            color=link_colors,
            customdata=np.array(link_labels),
            hovertemplate='%{customdata}<extra></extra>'
        )
    )])
    
    # Add annotations to describe each column
    fig.update_layout(
        title_text=title,
        font_size=12,
        height=600,
        width=1000,
        annotations=[
            dict(x=0.03, y=1, xref='paper', yref='paper', text='Sender', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.18, y=1, xref='paper', yref='paper', text='Receiver', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.32, y=1, xref='paper', yref='paper', text='Currency', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.47, y=1, xref='paper', yref='paper', text='MX/MT', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.62, y=1, xref='paper', yref='paper', text='MT', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.77, y=1, xref='paper', yref='paper', text='Direction', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.92, y=1, xref='paper', yref='paper', text='Amount', showarrow=False, font=dict(size=14, color='black')),
        ]
    )
    
    return fig

def visualize_banking_flows(df):
    # Ensure the expected columns exist
    required_columns = ['send region', 'receiver region', 'amount']
    missing_columns = [col for col in required_columns if col not in df.columns]
    
    if missing_columns:
        raise ValueError(f"Missing required columns in dataframe: {missing_columns}")
    
    # Check if year column exists, if not, provide a default
    if 'year' not in df.columns:
        print("Warning: 'year' column not found in data. Creating a dummy year column with value 2023.")
        df['year'] = 2023
    
    # Ensure all regions are in the coordinates dictionary
    unknown_send_regions = set(df['send region'].unique()) - set(region_coordinates.keys())
    unknown_recv_regions = set(df['receiver region'].unique()) - set(region_coordinates.keys())
    
    if unknown_send_regions:
        print(f"Warning: Unknown send regions: {unknown_send_regions}")
    if unknown_recv_regions:
        print(f"Warning: Unknown receiver regions: {unknown_recv_regions}")
    
    # Create the flow map
    flow_map = create_flow_map(df)
    
    # Get unique regions for dropdowns
    all_regions = sorted(list(set(df['send region'].unique()).union(set(df['receiver region'].unique()))))
    
    # Get unique years for the year dropdown
    years = sorted(df['year'].unique())
    
    # Create dropdown widgets
    send_dropdown = widgets.Dropdown(
        options=['All'] + all_regions,
        value='All',
        description='Sender Region:',
        layout=Layout(width='300px')
    )
    
    receive_dropdown = widgets.Dropdown(
        options=['All'] + all_regions,
        value='All',
        description='Receiver Region:',
        layout=Layout(width='300px')
    )
    
    # Create year dropdown
    year_dropdown = widgets.Dropdown(
        options=['All'] + list(years),
        value='All',
        description='Year:',
        layout=Layout(width='300px')
    )
    
    # Create output area for the Sankey diagram
    sankey_output = widgets.Output()
    
    def update_sankey(sender_region, receiver_region, year):
        with sankey_output:
            sankey_output.clear_output(wait=True)
            send_region = None if sender_region == 'All' else sender_region
            receive_region = None if receiver_region == 'All' else receiver_region
            selected_year = None if year == 'All' else year
            sankey_fig = create_sankey_for_regions(df, send_region, receive_region, selected_year)
            sankey_fig.show()
    
    # Create interactive widget with all three dropdowns
    interactive_widget = interactive(
        update_sankey,
        sender_region=send_dropdown,
        receiver_region=receive_dropdown,
        year=year_dropdown
    )
    
    # Display everything
    display(flow_map)
    display(interactive_widget)
    display(sankey_output)
    
    # Initial display
    update_sankey('All', 'All', 'All')
    
    return flow_map

# Example usage - comment these out if you're importing this as a module
flow_map = visualize_banking_flows(df)


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



interactive(children=(Dropdown(description='Sender Region:', layout=Layout(width='300px'), options=('All', np.…

Output()

In [6]:
df.columns

Index(['Count', 'sender', 'receiver', 'amount', 'currency', 'Mt', 'year',
       'direction', 'category', 'send region', 'receiver region', 'mx/mt',
       'transaction_status', 'payment_method'],
      dtype='object')

In [9]:
def create_sankey_for_regions(df, send_region=None, receive_region=None, year=None):
    """
    Create a Sankey diagram for transactions between specified regions,
    showing a flow from sender → receiver → currency → mx/mt → mt → direction → 
    transaction_status → payment_method → amount
    
    Parameters:
    -----------
    df : pandas DataFrame
        The DataFrame containing transaction data
    send_region : str, optional
        Region to filter senders by
    receive_region : str, optional
        Region to filter receivers by
    year : int, optional
        Year to filter transactions by
    
    Returns:
    --------
    plotly.graph_objects.Figure
        A Sankey diagram figure
    """
    import pandas as pd
    import numpy as np
    import plotly.graph_objects as go
    
    # Create a copy to avoid modifying the original
    filtered_df = df.copy()
    
    # Filter data if regions are specified
    title_parts = []
    
    if send_region:
        filtered_df = filtered_df[filtered_df['send region'] == send_region]
        title_parts.append(f"from {send_region}")
    
    if receive_region:
        filtered_df = filtered_df[filtered_df['receiver region'] == receive_region]
        title_parts.append(f"to {receive_region}")
    
    # Filter by year if specified
    if year and 'year' in filtered_df.columns:
        filtered_df = filtered_df[filtered_df['year'] == year]
        title_parts.append(f"in {year}")
    
    if title_parts:
        title = f"Transactions {' '.join(title_parts)}"
    else:
        title = "All Regional Transactions"
    
    # Ensure amount categories exist (create if missing)
    if 'amount_category' not in filtered_df.columns:
        filtered_df['amount_category'] = pd.cut(
            filtered_df['amount'], 
            bins=[0, 1e6, 5e6, 1e7, float('inf')], 
            labels=['< 1M', '1M-5M', '5M-10M', '> 10M']
        )
    
    # Check if we have data after all the filters
    if filtered_df.empty:
        fig = go.Figure()
        fig.update_layout(
            title_text=f"No data available for {title}",
            annotations=[dict(
                text="No transactions found for the selected criteria",
                xref="paper", yref="paper",
                x=0.5, y=0.5, showarrow=False,
                font=dict(size=20)
            )]
        )
        return fig
    
    # Make sure all columns are properly formatted as strings
    str_columns = ['sender', 'receiver', 'currency', 'mx/mt', 'Mt', 'direction', 
                  'transaction_status', 'payment_method', 'amount_category']
    for col in str_columns:
        if col in filtered_df.columns and filtered_df[col].dtype != 'str':
            filtered_df[col] = filtered_df[col].astype(str)
    
    # Create the flows according to the specified path
    # sender → receiver
    flow1 = filtered_df.groupby(['sender', 'receiver']).size().reset_index(name='count')
    
    # receiver → currency
    flow2 = filtered_df.groupby(['receiver', 'currency']).size().reset_index(name='count')
    
    # currency → mx/mt
    flow3 = filtered_df.groupby(['currency', 'mx/mt']).size().reset_index(name='count')
    
    # mx/mt → mt
    flow4 = filtered_df.groupby(['mx/mt', 'Mt']).size().reset_index(name='count')
    
    # mt → direction
    flow5 = filtered_df.groupby(['Mt', 'direction']).size().reset_index(name='count')
    
    # direction → transaction_status (new flow)
    flow6 = filtered_df.groupby(['direction', 'transaction_status']).size().reset_index(name='count')
    
    # transaction_status → payment_method (new flow)
    flow7 = filtered_df.groupby(['transaction_status', 'payment_method']).size().reset_index(name='count')
    
    # payment_method → amount (new flow)
    flow8 = filtered_df.groupby(['payment_method', 'amount_category']).size().reset_index(name='count')
    
    # Get all unique labels
    labels = pd.unique(
        flow1['sender'].tolist() +
        flow1['receiver'].tolist() +
        flow2['currency'].tolist() +
        flow3['mx/mt'].tolist() +
        flow4['Mt'].tolist() +
        flow5['direction'].tolist() +
        flow6['transaction_status'].tolist() +
        flow7['payment_method'].tolist() +
        flow8['amount_category'].astype(str).tolist()
    )
    
    # Map labels to indices
    label_to_index = {label: i for i, label in enumerate(labels)}
    
    # Prepare sources, targets, and values for links
    source, target, value = [], [], []
    link_labels = []
    
    # sender → receiver
    for _, row in flow1.iterrows():
        source.append(label_to_index[row['sender']])
        target.append(label_to_index[row['receiver']])
        value.append(row['count'])
        link_labels.append(f"{row['sender']} → {row['receiver']}<br>Count: {row['count']}")
    
    # receiver → currency
    for _, row in flow2.iterrows():
        source.append(label_to_index[row['receiver']])
        target.append(label_to_index[row['currency']])
        value.append(row['count'])
        link_labels.append(f"{row['receiver']} → {row['currency']}<br>Count: {row['count']}")
    
    # currency → mx/mt
    for _, row in flow3.iterrows():
        source.append(label_to_index[row['currency']])
        target.append(label_to_index[row['mx/mt']])
        value.append(row['count'])
        link_labels.append(f"{row['currency']} → {row['mx/mt']}<br>Count: {row['count']}")
    
    # mx/mt → mt
    for _, row in flow4.iterrows():
        source.append(label_to_index[row['mx/mt']])
        target.append(label_to_index[row['Mt']])
        value.append(row['count'])
        link_labels.append(f"{row['mx/mt']} → {row['Mt']}<br>Count: {row['count']}")
    
    # mt → direction
    for _, row in flow5.iterrows():
        source.append(label_to_index[row['Mt']])
        target.append(label_to_index[row['direction']])
        value.append(row['count'])
        link_labels.append(f"{row['Mt']} → {row['direction']}<br>Count: {row['count']}")
    
    # direction → transaction_status (new flow)
    for _, row in flow6.iterrows():
        source.append(label_to_index[row['direction']])
        target.append(label_to_index[row['transaction_status']])
        value.append(row['count'])
        link_labels.append(f"{row['direction']} → {row['transaction_status']}<br>Count: {row['count']}")
    
    # transaction_status → payment_method (new flow)
    for _, row in flow7.iterrows():
        source.append(label_to_index[row['transaction_status']])
        target.append(label_to_index[row['payment_method']])
        value.append(row['count'])
        link_labels.append(f"{row['transaction_status']} → {row['payment_method']}<br>Count: {row['count']}")
    
    # payment_method → amount_category (new flow)
    for _, row in flow8.iterrows():
        source.append(label_to_index[row['payment_method']])
        target.append(label_to_index[str(row['amount_category'])])
        value.append(row['count'])
        link_labels.append(f"{row['payment_method']} → {row['amount_category']}<br>Count: {row['count']}")
    
    # Set up colors for nodes and links
    num_nodes = len(labels)
    node_colors = ["rgba(31, 119, 180, 0.8)"] * num_nodes  # Default blue color
    
    # Create color groups for different node types
    sender_idx = [label_to_index[label] for label in flow1['sender'].unique()]
    receiver_idx = [label_to_index[label] for label in flow1['receiver'].unique()]
    currency_idx = [label_to_index[label] for label in flow2['currency'].unique()]
    mxmt_idx = [label_to_index[label] for label in flow3['mx/mt'].unique()]
    mt_idx = [label_to_index[label] for label in flow4['Mt'].unique()]
    direction_idx = [label_to_index[label] for label in flow5['direction'].unique()]
    status_idx = [label_to_index[label] for label in flow6['transaction_status'].unique()]
    payment_idx = [label_to_index[label] for label in flow7['payment_method'].unique()]
    amount_idx = [label_to_index[str(label)] for label in flow8['amount_category'].unique()]
    
    # Assign different colors to each node type
    for idx in sender_idx:
        node_colors[idx] = "rgba(214, 39, 40, 0.8)"  # Red for senders
    for idx in receiver_idx:
        node_colors[idx] = "rgba(44, 160, 44, 0.8)"  # Green for receivers
    for idx in currency_idx:
        node_colors[idx] = "rgba(255, 127, 14, 0.8)"  # Orange for currency
    for idx in mxmt_idx:
        node_colors[idx] = "rgba(148, 103, 189, 0.8)"  # Purple for mx/mt
    for idx in mt_idx:
        node_colors[idx] = "rgba(140, 86, 75, 0.8)"   # Brown for Mt
    for idx in direction_idx:
        node_colors[idx] = "rgba(23, 190, 207, 0.8)"  # Cyan for directions
    for idx in status_idx:
        node_colors[idx] = "rgba(188, 189, 34, 0.8)"  # Yellow-green for transaction status
    for idx in payment_idx:
        node_colors[idx] = "rgba(127, 127, 127, 0.8)"  # Gray for payment method
    for idx in amount_idx:
        node_colors[idx] = "rgba(31, 119, 180, 0.8)"  # Blue for amount categories
    
    # Create colors for links based on their source
    link_colors = []
    for s in source:
        # Use a slightly transparent version of the source node color
        base_color = node_colors[s].replace("0.8", "0.6")
        link_colors.append(base_color)
    
    # Create the Sankey diagram
    fig = go.Figure(data=[go.Sankey(
        arrangement="snap",
        node=dict(
            pad=20,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=list(labels),
            color=node_colors
        ),
        link=dict(
            source=source,
            target=target,
            value=value,
            color=link_colors,
            customdata=np.array(link_labels),
            hovertemplate='%{customdata}<extra></extra>'
        )
    )])
    
    # Add annotations to describe each column
    fig.update_layout(
        title_text=title,
        font_size=12,
        height=800,  # Increased height to accommodate more nodes
        width=1200,  # Increased width to accommodate more columns
        annotations=[
            dict(x=0.02, y=1, xref='paper', yref='paper', text='Sender', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.14, y=1, xref='paper', yref='paper', text='Receiver', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.25, y=1, xref='paper', yref='paper', text='Currency', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.37, y=1, xref='paper', yref='paper', text='MX/MT', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.48, y=1, xref='paper', yref='paper', text='MT', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.60, y=1, xref='paper', yref='paper', text='Direction', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.71, y=1, xref='paper', yref='paper', text='Status', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.83, y=1, xref='paper', yref='paper', text='Payment', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.94, y=1, xref='paper', yref='paper', text='Amount', showarrow=False, font=dict(size=14, color='black')),
        ]
    )
    
    return fig

def visualize_banking_flows(df):
    # Ensure the expected columns exist
    required_columns = ['send region', 'receiver region', 'amount', 'transaction_status', 'payment_method']
    missing_columns = [col for col in required_columns if col not in df.columns]
    
    if missing_columns:
        raise ValueError(f"Missing required columns in dataframe: {missing_columns}")
    
    # Check if year column exists, if not, provide a default
    if 'year' not in df.columns:
        print("Warning: 'year' column not found in data. Creating a dummy year column with value 2023.")
        df['year'] = 2023
    
    # Ensure all regions are in the coordinates dictionary
    unknown_send_regions = set(df['send region'].unique()) - set(region_coordinates.keys())
    unknown_recv_regions = set(df['receiver region'].unique()) - set(region_coordinates.keys())
    
    if unknown_send_regions:
        print(f"Warning: Unknown send regions: {unknown_send_regions}")
    if unknown_recv_regions:
        print(f"Warning: Unknown receiver regions: {unknown_recv_regions}")
    
    # Create the flow map
    flow_map = create_flow_map(df)
    
    # Get unique regions for dropdowns
    all_regions = sorted(list(set(df['send region'].unique()).union(set(df['receiver region'].unique()))))
    
    # Get unique years for the year dropdown
    years = sorted(df['year'].unique())
    
    # Create dropdown widgets
    send_dropdown = widgets.Dropdown(
        options=['All'] + all_regions,
        value='All',
        description='Sender Region:',
        layout=Layout(width='300px')
    )
    
    receive_dropdown = widgets.Dropdown(
        options=['All'] + all_regions,
        value='All',
        description='Receiver Region:',
        layout=Layout(width='300px')
    )
    
    # Create year dropdown
    year_dropdown = widgets.Dropdown(
        options=['All'] + list(years),
        value='All',
        description='Year:',
        layout=Layout(width='300px')
    )
    
    # Create output area for the Sankey diagram
    sankey_output = widgets.Output()
    
    def update_sankey(sender_region, receiver_region, year):
        with sankey_output:
            sankey_output.clear_output(wait=True)
            send_region = None if sender_region == 'All' else sender_region
            receive_region = None if receiver_region == 'All' else receiver_region
            selected_year = None if year == 'All' else year
            sankey_fig = create_sankey_for_regions(df, send_region, receive_region, selected_year)
            sankey_fig.show()
    
    # Create interactive widget with all three dropdowns
    interactive_widget = interactive(
        update_sankey,
        sender_region=send_dropdown,
        receiver_region=receive_dropdown,
        year=year_dropdown
    )
    
    # Display everything
    display(flow_map)
    display(interactive_widget)
    display(sankey_output)
    
    # Initial display
    update_sankey('All', 'All', 'All')
    
    return flow_map
visualize_banking_flows(df) 


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



interactive(children=(Dropdown(description='Sender Region:', layout=Layout(width='300px'), options=('All', np.…

Output()

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
# Set default renderer to notebook for Jupyter compatibility
pio.renderers.default = "notebook"

# Region coordinates (approximate centroids)
region_coordinates = {
    "North America": {"lat": 40.0, "lon": -100.0},
    "Europe": {"lat": 50.0, "lon": 10.0},
    "Asia Pacific": {"lat": 30.0, "lon": 120.0},
    "Middle East": {"lat": 27.0, "lon": 45.0},
    "Africa": {"lat": 5.0, "lon": 20.0},
    "Latin America": {"lat": -10.0, "lon": -60.0},
    "Caribbean": {"lat": 20.0, "lon": -75.0},
    "Central Asia": {"lat": 45.0, "lon": 65.0},
    "South Asia": {"lat": 20.0, "lon": 75.0},
    "Southeast Asia": {"lat": 10.0, "lon": 105.0}
}

def prepare_region_flow_data(df):
    flow_data = df.grouppy(['send region', 'receiver region'])['amount'].sum().reset_index()
    for coord in ['lat', 'lon']:
        flow_data[f'source_{coord}'] = flow_data['send region'].map(lambda x: region_coordinates.get(x, {}).get(coord))
        flow_data[f'target_{coord}'] = flow_data['receiver region'].map(lambda x: region_coordinates.get(x, {}).get(coord))
    return flow_data

def create_sankey_diagram(df, source_region=None, target_region=None):
    """
    Create a Sankey diagram for flows between regions
    If source_region and target_region are provided, filter to show only flows between these regions
    Otherwise, default to showing North America and Europe
    """
    # Default regions if none specified
    if source_region is None:
        source_region = "North America"
    if target_region is None:
        target_region = "Europe"
    
    # Filter data for the selected regions
    if source_region and target_region:
        filtered_df = df[(df['send region'] == source_region) & (df['receiver region'] == target_region) |
                         (df['send region'] == target_region) & (df['receiver region'] == source_region)]
    else:
        filtered_df = df
    
    # If no data found, return empty figure with message
    if filtered_df.empty:
        fig = go.Figure()
        fig.add_annotation(
            text="No flow data between selected regions",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=dict(size=20)
        )
        return fig
    
    # Prepare node labels (unique regions involved)
    unique_regions = pd.unique(filtered_df[['send region', 'receiver region']].values.ravel('K'))
    region_to_idx = {region: i for i, region in enumerate(unique_regions)}
    
    # Prepare Sankey data
    sources = [region_to_idx[region] for region in filtered_df['send region']]
    targets = [region_to_idx[region] for region in filtered_df['receiver region']]
    values = filtered_df['amount'].tolist()
    
    # Create figure
    fig = go.Figure(data=[go.Sankey(
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=list(unique_regions),
            color="blue"
        ),
        link=dict(
            source=sources,
            target=targets,
            value=values,
            # Add hover information
            hovertemplate='%{source.label} → %{target.label}<br>Amount: $%{value:,.2f}<extra></extra>'
        )
    )])
    
    # Update layout
    fig.update_layout(
        title_text=f"Banking Flows: {source_region} ↔ {target_region}",
        font_size=12,
        height=400,
        margin=dict(l=25, r=25, t=50, b=25)
    )
    
    return fig

def create_flow_map(df, mapbox_token=None):
    # Fix typo in function name: grouppy -> groupby
    flow_data = df.groupby(['send region', 'receiver region'])['amount'].sum().reset_index()
    for coord in ['lat', 'lon']:
        flow_data[f'source_{coord}'] = flow_data['send region'].map(lambda x: region_coordinates.get(x, {}).get(coord))
        flow_data[f'target_{coord}'] = flow_data['receiver region'].map(lambda x: region_coordinates.get(x, {}).get(coord))
    
    region_flows = pd.merge(
        df.groupby('receiver region')['amount'].sum().rename('inflow').reset_index(),
        df.groupby('send region')['amount'].sum().rename('outflow').reset_index(),
        left_on='receiver region', right_on='send region', how='outer'
    ).fillna(0).rename(columns={'receiver region': 'region'}).drop('send region', axis=1)
    
    region_flows['net_flow'] = region_flows['inflow'] - region_flows['outflow']
    region_flows['total_volume'] = region_flows['inflow'] + region_flows['outflow']
    region_data = pd.merge(region_flows, pd.DataFrame(region_coordinates).T.reset_index().rename(columns={'index': 'region'}), on='region')
    
    flow_data['normalized_amount'] = flow_data['amount'] / flow_data['amount'].max() * 10
    fig = go.Figure()
    
    # Add flow lines
    for idx, row in flow_data[flow_data['normalized_amount'] >= 0.5].iterrows():
        # Check for None or NaN values in coordinates
        if (pd.isna(row['source_lon']) or pd.isna(row['source_lat']) or 
            pd.isna(row['target_lon']) or pd.isna(row['target_lat'])):
            continue
            
        lon_diff, lat_diff = row['target_lon'] - row['source_lon'], row['target_lat'] - row['source_lat']
        midpoint_offset = min(np.sqrt(lon_diff**2 + lat_diff**2) * 0.15, 10)
        curve_points = [(row['source_lon'], row['source_lat'])] + [
            ((1-t)**2 * row['source_lon'] + 2*(1-t)*t * ((row['source_lon'] + row['target_lon'])/2) + t**2 * row['target_lon'],
             (1-t)**2 * row['source_lat'] + 2*(1-t)*t * ((row['source_lat'] + row['target_lat'])/2 + midpoint_offset) + t**2 * row['target_lat'])
            for t in np.linspace(0, 1, 20)[1:-1]
        ] + [(row['target_lon'], row['target_lat'])]
        
        lons, lats = zip(*curve_points)
        # Fix the color transparency value
        alpha = min(0.8, 0.3 + row["normalized_amount"] * 0.05)
        color = f'rgba(70, 130, 180, {alpha:.2f})'
        
        fig.add_trace(go.Scattermapbox(
            lon=lons, lat=lats, mode='lines',
            line=dict(width=row['normalized_amount']*1.5, color=color),
            hoverinfo='text', hovertext=f"{row['send region']} → {row['receiver region']}<br>Amount: ${row['amount']:,.2f}",
            showlegend=False,
            # Add custom data for click events - make sure the format is correct for callbacks
            customdata=[[{
                'source_region': row['send region'],
                'target_region': row['receiver region'],
                'flow_id': f"{row['send region']}_{row['receiver region']}"
            }]]
        ))
    
    # Add regions
    # Filter out any rows with NaN coordinates
    valid_region_data = region_data.dropna(subset=['lon', 'lat'])
    if not valid_region_data.empty:
        fig.add_trace(go.Scattermapbox(
            lon=valid_region_data['lon'], 
            lat=valid_region_data['lat'], 
            text=valid_region_data['region'],
            customdata=valid_region_data[['inflow', 'outflow', 'net_flow', 'region']].values,
            hovertemplate='<b>%{text}</b><br>Inflow: $%{customdata[0]:,.2f}<br>Outflow: $%{customdata[1]:,.2f}<br>Net Flow: $%{customdata[2]:,.2f}',
            mode='markers',
            marker=dict(
                size=valid_region_data['total_volume'] / valid_region_data['total_volume'].max() * 25 + 10,
                color=valid_region_data['net_flow'], 
                colorscale='RdBu', 
                cmid=0, 
                opacity=0.8,
                colorbar=dict(title='Net Flow<br>(Inflow - Outflow)', thickness=15)
            ),
            name='Regions'
        ))
    
    fig.update_layout(
        title='Global Banking Transaction Flows Between Regions',
        mapbox=dict(
            style='carto-positron',
            zoom=1.2,
            center=dict(lat=20, lon=0),
            **({'accesstoken': mapbox_token, 'style': 'mapbox://styles/mapbox/light-v10'} if mapbox_token else {})
        ),
        height=500, margin=dict(l=0, r=0, t=50, b=0)
    )
    return fig

def create_dashboard(df):
    """
    Create an interactive dashboard with a map and Sankey diagram
    """
    # Create subplots with 2 rows
    fig = make_subplots(
        rows=2, cols=1,
        specs=[[{"type": "mapbox"}], [{"type": "sankey"}]],
        row_heights=[0.6, 0.4],
        vertical_spacing=0.05,
        subplot_titles=("Global Banking Flow Map", "Banking Flow Detail - North America ↔ Europe")
    )
    
    # Create the map figure
    map_fig = create_flow_map(df)
    
    # Create default Sankey diagram
    sankey_fig = create_sankey_diagram(df, "North America", "Europe")
    
    # Add traces from map figure to subplot
    for trace in map_fig.data:
        fig.add_trace(trace, row=1, col=1)
    
    # Add Sankey diagram to second subplot
    for trace in sankey_fig.data:
        fig.add_trace(trace, row=2, col=1)
    
    # Update layout for both subplots
    fig.update_layout(
        height=900,
        width=1200,
        title_text="Interactive Banking Flows Dashboard",
        mapbox=dict(
            style='carto-positron',
            zoom=1.2,
            center=dict(lat=20, lon=0)
        ),
        margin=dict(l=0, r=0, t=50, b=0)
    )
    
    # Add annotation for instructions
    fig.add_annotation(
        xref='paper', yref='paper',
        x=0.5, y=0.47,
        text="Click on flow lines to update the Sankey diagram below",
        showarrow=False,
        font=dict(size=12)
    )
    
    # Add custom click event handling for Jupyter compatibility
    fig.update_layout(
        clickmode='event+select',
    )
    
    return fig

def visualize_banking_flows_with_sankey(df):
    """
    Create an interactive dashboard for Jupyter notebooks
    """
    # Ensure the expected columns exist
    required_columns = ['send region', 'receiver region', 'amount']
    missing_columns = [col for col in required_columns if col not in df.columns]
    
    if missing_columns:
        raise ValueError(f"Missing required columns in dataframe: {missing_columns}")
    
    # Ensure all regions are in the coordinates dictionary
    unknown_send_regions = set(df['send region'].unique()) - set(region_coordinates.keys())
    unknown_recv_regions = set(df['receiver region'].unique()) - set(region_coordinates.keys())
    
    if unknown_send_regions:
        print(f"Warning: Unknown send regions: {unknown_send_regions}")
    if unknown_recv_regions:
        print(f"Warning: Unknown receiver regions: {unknown_recv_regions}")
    
    # Create the dashboard figure
    dashboard_fig = create_dashboard(df)
    
    # Return the figure for display in the notebook
    return dashboard_fig

# Improved function to create interactive dashboard with click-to-update functionality
def create_interactive_dashboard(df=None):
    """
    Create an interactive dashboard that updates the Sankey diagram when flow lines are clicked
    
    Parameters:
    -----------
    df : pandas.DataFrame, optional
        DataFrame with banking flow data. If None, sample data will be generated.
    
    Returns:
    --------
    fig : plotly.graph_objects.Figure
        Interactive figure that can be displayed in a Jupyter notebook
    """
    # Generate sample data if not provided
    if df is None:
        # Generate sample data
        regions = ["North America", "Europe", "Asia Pacific", "Middle East", "Africa", "Latin America"]
        sample_data = []
        
        import random
        random.seed(42)  # For reproducible results
        
        for send_region in regions:
            for receiver_region in regions:
                if send_region != receiver_region:
                    # Generate a random amount (more flows between major regions)
                    amount_factor = 1.0
                    if send_region in ["North America", "Europe", "Asia Pacific"] and receiver_region in ["North America", "Europe", "Asia Pacific"]:
                        amount_factor = 3.0
                    
                    amount = random.uniform(1000, 10000) * amount_factor
                    sample_data.append({
                        "send region": send_region,
                        "receiver region": receiver_region,
                        "amount": amount
                    })
        
        # Create DataFrame
        df = pd.DataFrame(sample_data)
    
    # Create the dashboard figure
    fig = create_dashboard(df)
    
    # Add click callbacks with plotly.js
    # This is the key part that makes the Sankey diagram update when clicking flow lines
    fig.update_layout(
        updatemenus=[
            {
                "buttons": [],
                "direction": "left",
                "pad": {"r": 10, "t": 10},
                "showactive": False,
                "type": "buttons",
                "x": 0.1,
                "xanchor": "right",
                "y": 1.1,
                "yanchor": "top"
            }
        ]
    )
    
    # Add JavaScript code for interactivity
    fig.add_annotation(
        xref='paper', yref='paper', 
        x=0, y=0,
        text='',
        showarrow=False,
        hovertext="""
        <script>
        // Get the Plotly figure element
        var figDiv = document.querySelector('div[class*="plotly-graph-div"]');
        
        // Add click event listener to the figure
        figDiv.on('plotly_click', function(data) {
            // Check if the clicked trace is a flow line
            var traceType = data.points[0].data.mode;
            if (traceType === 'lines') {
                var customdata = data.points[0].customdata;
                
                if (customdata && customdata[0]) {
                    var sourceRegion = customdata[0].source_region;
                    var targetRegion = customdata[0].target_region;
                    
                    // Now update the Sankey diagram
                    // This requires server-side support to work properly
                    // In a Jupyter environment, we'd need to use callback functions
                    
                    // For now, let's at least update the subtitle
                    Plotly.relayout(figDiv, {
                        'annotations[1].text': 'Banking Flow Detail - ' + sourceRegion + ' ↔ ' + targetRegion
                    });
                    
                    // Ideally, we would also update the Sankey diagram itself
                }
            }
        });
        </script>
        """,
        visible=False
    )
    
    return fig

# For running with full interactivity
def create_interactive_notebook_dashboard(df=None):
    """
    Create a fully interactive dashboard in a Jupyter notebook
    This uses FigureWidget and ipywidgets for true interactivity
    
    Parameters:
    -----------
    df : pandas.DataFrame, optional
        DataFrame with banking flow data. If None, sample data will be generated.
    
    Returns:
    --------
    fig_widget : plotly.graph_objects.FigureWidget
        Interactive figure widget that can be displayed in a Jupyter notebook
    """
    try:
        from plotly.graph_objects import FigureWidget
        import ipywidgets as widgets
    except ImportError:
        print("To use this function, install ipywidgets: pip install ipywidgets")
        return None
    
    # Generate sample data if not provided
    if df is None:
        # Generate sample data
        regions = ["North America", "Europe", "Asia Pacific", "Middle East", "Africa", "Latin America"]
        sample_data = []
        
        import random
        random.seed(42)  # For reproducible results
        
        for send_region in regions:
            for receiver_region in regions:
                if send_region != receiver_region:
                    # Generate a random amount (more flows between major regions)
                    amount_factor = 1.0
                    if send_region in ["North America", "Europe", "Asia Pacific"] and receiver_region in ["North America", "Europe", "Asia Pacific"]:
                        amount_factor = 3.0
                    
                    amount = random.uniform(1000, 10000) * amount_factor
                    sample_data.append({
                        "send region": send_region,
                        "receiver region": receiver_region,
                        "amount": amount
                    })
        
        # Create DataFrame
        df = pd.DataFrame(sample_data)
    
    # Create the dashboard figure
    fig = create_dashboard(df)
    
    # Convert to FigureWidget for interactivity
    fig_widget = FigureWidget(fig)
    
    # Define click handler
    def handle_click(trace, points, selector):
        if not points.point_inds:
            return
            
        point_index = points.point_inds[0]
        curve_num = points.trace_index
        
        # Get the clicked trace
        trace = fig_widget.data[curve_num]
        
        # Check if it's a flow line with customdata
        if hasattr(trace, 'customdata') and trace.customdata is not None and len(trace.customdata) > point_index:
            # Get customdata from the clicked point
            customdata = trace.customdata[point_index]
            
            # For FigureWidget, we need to ensure customdata is in the right format
            if isinstance(customdata, list) and len(customdata) > 0:
                customdata = customdata[0]
            
            # Check if customdata has source_region and target_region
            if isinstance(customdata, dict) and 'source_region' in customdata and 'target_region' in customdata:
                source = customdata['source_region']
                target = customdata['target_region']
                
                # Update title
                for i, annotation in enumerate(fig_widget.layout.annotations):
                    if i == 1:  # The second annotation is the Sankey diagram title
                        annotation.text = f"Banking Flow Detail - {source} ↔ {target}"
                
                # Create new Sankey diagram
                new_sankey = create_sankey_diagram(df, source, target)
                
                # Find Sankey trace
                for i, trace in enumerate(fig_widget.data):
                    if trace.type == 'sankey':
                        # Update the Sankey trace
                        if len(new_sankey.data) > 0:
                            fig_widget.data[i].node = new_sankey.data[0].node
                            fig_widget.data[i].link = new_sankey.data[0].link
                        break
    
    # Add click handlers to all flow line traces
    for i, trace in enumerate(fig_widget.data):
        if hasattr(trace, 'mode') and trace.mode == 'lines':
            trace.on_click(handle_click)
    
    print("Interactive dashboard created. Click on flow lines to update the Sankey diagram.")
    return fig_widget

# Main function to run for demonstration
def main():
    """
    Main function to demonstrate usage
    """
    # Sample data creation
    regions = ["North America", "Europe", "Asia Pacific", "Middle East", "Africa", "Latin America"]
    sample_data = []
    
    
    # Generate some random flow data between regions
    import random
    random.seed(42)  # For reproducible results
    
    for send_region in regions:
        for receiver_region in regions:
            if send_region != receiver_region:
                # Generate a random amount (more flows between major regions)
                amount_factor = 1.0
                if send_region in ["North America", "Europe", "Asia Pacific"] and receiver_region in ["North America", "Europe", "Asia Pacific"]:
                    amount_factor = 3.0
                
                amount = random.uniform(1000, 10000) * amount_factor
                sample_data.append({
                    "send region": send_region,
                    "receiver region": receiver_region,
                    "amount": amount
                })
    
    # Create DataFrame
    df = pd.DataFrame(sample_data)
    
    # Try to use the interactive FigureWidget version first
    try:
        from plotly.graph_objects import FigureWidget
        print("Using interactive FigureWidget dashboard")
        dashboard_fig = create_interactive_notebook_dashboard(df)
    except ImportError:
        # Fall back to regular figure with custom JavaScript
        print("FigureWidget not available, using regular interactive dashboard")
        dashboard_fig = create_interactive_dashboard(df)
    
    return dashboard_fig

# For running in notebook with one cell - this is the main entry point
if __name__ == "__main__":
    # This will be executed when the cell is run in a notebook
    try:
        # Try importing FigureWidget for fully interactive version
        from plotly.graph_objects import FigureWidget
        import ipywidgets as widgets
        # Create the fully interactive dashboard
        fig = create_interactive_notebook_dashboard()
        display(fig)  # Show the interactive figure
    except ImportError:
        # Fall back to non-interactive version if FigureWidget not available
        print("FigureWidget not available, showing interactive dashboard with JavaScript callbacks")
        fig = create_interactive_dashboard()
        fig.show()


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



Interactive dashboard created. Click on flow lines to update the Sankey diagram.


FigureWidget({
    'data': [{'customdata': [[{'source_region': 'Africa', 'target_region': 'Asia
                             Pacific', 'flow_id': 'Africa_Asia Pacific'}]],
              'hoverinfo': 'text',
              'hovertext': 'Africa → Asia Pacific<br>Amount: $4,062.25',
              'lat': [5.0, 7.313019390581718, 9.515235457063712,
                      11.606648199445981, 13.58725761772853, 15.45706371191136,
                      17.21606648199446, 18.864265927977836, 20.401662049861493,
                      21.828254847645432, 23.144044321329638, 24.34903047091413,
                      25.44321329639889, 26.426592797783933, 27.29916897506925,
                      28.060941828254848, 28.711911357340718, 29.252077562326868,
                      29.681440443213297, 30.0],
              'line': {'color': 'rgba(70, 130, 180, 0.37)', 'width': 2.2494060652580856},
              'lon': [20.0, 25.263157894736842, 30.526315789473685,
                      35.78947368421052, 41.

ValueError: 
Invalid property path 'mapbox._derived' for layout
