In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

np.random.seed(42)

num_records = 500
banks = [
    "Citibank", "HSBC", "JPMorgan Chase", "Bank of America", "Standard Chartered",
    "Deutsche Bank", "BNP Paribas", "Barclays", "Credit Suisse", "UBS",
    "Emirates NBD", "Abu Dhabi Commercial Bank", "First Abu Dhabi Bank", "Dubai Islamic Bank",
    "Mashreq Bank", "Bank of China", "Industrial and Commercial Bank of China", "Wells Fargo",
    "Goldman Sachs", "Morgan Stanley", "Royal Bank of Canada", "Bank of Tokyo-Mitsubishi UFJ"
]

uae_banks = [
    "Emirates NBD", "Abu Dhabi Commercial Bank", "First Abu Dhabi Bank", 
    "Dubai Islamic Bank", "Mashreq Bank", "Commercial Bank of Dubai", 
    "Abu Dhabi Islamic Bank", "RAK Bank", "National Bank of Fujairah",
    "Sharjah Islamic Bank", "United Arab Bank", "National Bank of Umm Al Qaiwain"
]

currencies = ["USD", "EUR", "GBP", "AED", "JPY", "CHF", "CNY", "CAD", "AUD", "SGD"]
mt_types = ["MT103", "MT202", "MT700", "MT760", "MT799", "MT910", "MT940", "MT950"]
directions = ["Outgoing", "Incoming"]
categories = ["Trade Finance", "Corporate Payment", "Retail Payment", "Interbank Settlement", 
              "Treasury Operations", "Investment", "Loan Disbursement", "FX Settlement"]

regions = [
    "North America", "Europe", "Asia Pacific", "Middle East", "Africa", 
    "Latin America", "Caribbean", "Central Asia", "South Asia", "Southeast Asia"
]

def random_date(start_year=2015, end_year=2023):
    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)
    time_between_dates = end_date - start_date
    days_between_dates = time_between_dates.days
    random_number_of_days = random.randrange(days_between_dates)
    return start_date + timedelta(days=random_number_of_days)

data = []
for i in range(num_records):
    # Determine if receiver is UAE-based (75% probability)
    is_uae_receiver = np.random.choice([True, False], p=[0.75, 0.25])

    sender = np.random.choice(banks)
    if is_uae_receiver:
        receiver = np.random.choice(uae_banks)
        receiver_region = "Middle East"  # UAE is in Middle East
    else:

        possible_receivers = [bank for bank in banks if bank != sender]
        receiver = np.random.choice(possible_receivers)
        receiver_region = np.random.choice([r for r in regions if r != "Middle East"])

    count = i + 1
    amount = round(np.random.lognormal(mean=10, sigma=1.5), 2)  
    currency = np.random.choice(currencies, p=[0.4, 0.2, 0.1, 0.15, 0.05, 0.02, 0.03, 0.02, 0.02, 0.01])  # USD most common
    mt = np.random.choice(mt_types)
    year = random_date().year
    direction = np.random.choice(directions)
    category = np.random.choice(categories)
    send_region = np.random.choice(regions)
    mx_mt = "MT"  # All are MT in this simulation

    # Add to data
    data.append([
        count, sender, receiver, amount, currency, mt, year, direction, 
        category, send_region, receiver_region, mx_mt
    ])

columns = ['Count', 'sender', 'receiver', 'amount', 'currency', 'Mt', 'year', 
           'direction', 'category', 'send region', 'receiver region', 'mx/mt']
df = pd.DataFrame(data, columns=columns)

for idx, row in df.iterrows():
    if row['currency'] == 'JPY':
        df.at[idx, 'amount'] = round(row['amount'] * 100)  
    elif row['currency'] == 'AED':
        df.at[idx, 'amount'] = round(row['amount'] * 3.67, 2)  
    elif row['currency'] == 'EUR':
        df.at[idx, 'amount'] = round(row['amount'] * 0.85, 2)
    elif row['currency'] == 'GBP':
        df.at[idx, 'amount'] = round(row['amount'] * 0.75, 2)

df

Unnamed: 0,Count,sender,receiver,amount,currency,Mt,year,direction,category,send region,receiver region,mx/mt
0,1,Mashreq Bank,United Arab Bank,38111.53,EUR,MT940,2015,Outgoing,Retail Payment,Central Asia,Middle East,MT
1,2,Barclays,RAK Bank,128668.30,GBP,MT799,2022,Incoming,FX Settlement,Latin America,Middle East,MT
2,3,Royal Bank of Canada,Emirates NBD,26914.61,EUR,MT103,2023,Outgoing,Retail Payment,Southeast Asia,Middle East,MT
3,4,Bank of China,First Abu Dhabi Bank,1248.98,USD,MT760,2017,Incoming,Trade Finance,Asia Pacific,Middle East,MT
4,5,BNP Paribas,Bank of Tokyo-Mitsubishi UFJ,6710.04,GBP,MT940,2016,Incoming,Loan Disbursement,Central Asia,Southeast Asia,MT
...,...,...,...,...,...,...,...,...,...,...,...,...
495,496,UBS,National Bank of Fujairah,14928.90,AED,MT799,2015,Incoming,Corporate Payment,South Asia,Middle East,MT
496,497,Bank of China,Abu Dhabi Commercial Bank,11987567.00,JPY,MT940,2018,Outgoing,Loan Disbursement,Caribbean,Middle East,MT
497,498,Industrial and Commercial Bank of China,Credit Suisse,61238.79,USD,MT700,2021,Outgoing,Corporate Payment,South Asia,South Asia,MT
498,499,Abu Dhabi Commercial Bank,Commercial Bank of Dubai,1165309.00,JPY,MT202,2018,Incoming,Loan Disbursement,Caribbean,Middle East,MT


In [None]:
    status_choices = ["Completed", "Pending", "Failed", "Cancelled"]
    status_probs = [0.85, 0.08, 0.05, 0.02]
    df['transaction_status'] = np.random.choice(status_choices, size=len(df), p=status_probs)
    payment_methods = ["Wire Transfer", "ACH", "SWIFT", "Check", "Cash", "Mobile Payment", "Credit Card"]
    probabilities = [0.4, 0.25, 0.15, 0.05, 0.05, 0.05, 0.05]

    df['payment_method'] = np.random.choice(payment_methods, size=len(df), p=probabilities)



In [3]:
df.columns

Index(['Count', 'sender', 'receiver', 'amount', 'currency', 'Mt', 'year',
       'direction', 'category', 'send region', 'receiver region', 'mx/mt',
       'transaction_status', 'payment_method'],
      dtype='object')

In [19]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from ipywidgets import widgets, interactive, Layout
from IPython.display import display

# Region coordinates (approximate centroids)
region_coordinates = {
    "North America": {"lat": 40.0, "lon": -100.0},
    "Europe": {"lat": 50.0, "lon": 10.0},
    "Asia Pacific": {"lat": 30.0, "lon": 120.0},
    "Middle East": {"lat": 27.0, "lon": 45.0},
    "Africa": {"lat": 5.0, "lon": 20.0},
    "Latin America": {"lat": -10.0, "lon": -60.0},
    "Caribbean": {"lat": 20.0, "lon": -75.0},
    "Central Asia": {"lat": 45.0, "lon": 65.0},
    "South Asia": {"lat": 20.0, "lon": 75.0},
    "Southeast Asia": {"lat": 10.0, "lon": 105.0}
}

def prepare_region_flow_data(df):
    flow_data = df.groupby(['send region', 'receiver region'])['amount'].sum().reset_index()
    for coord in ['lat', 'lon']:
        flow_data[f'source_{coord}'] = flow_data['send region'].map(lambda x: region_coordinates.get(x, {}).get(coord))
        flow_data[f'target_{coord}'] = flow_data['receiver region'].map(lambda x: region_coordinates.get(x, {}).get(coord))
    return flow_data

def create_flow_map(df, mapbox_token=None):
    flow_data = prepare_region_flow_data(df)
    region_flows = pd.merge(
        df.groupby('receiver region')['amount'].sum().rename('inflow').reset_index(),
        df.groupby('send region')['amount'].sum().rename('outflow').reset_index(),
        left_on='receiver region', right_on='send region', how='outer'
    ).fillna(0).rename(columns={'receiver region': 'region'}).drop('send region', axis=1)
    
    region_flows['net_flow'] = region_flows['inflow'] - region_flows['outflow']
    region_flows['total_volume'] = region_flows['inflow'] + region_flows['outflow']
    region_data = pd.merge(region_flows, pd.DataFrame(region_coordinates).T.reset_index().rename(columns={'index': 'region'}), on='region')
    
    flow_data['normalized_amount'] = flow_data['amount'] / flow_data['amount'].max() * 10
    fig = go.Figure()
    
    # Add flow lines
    for idx, row in flow_data[flow_data['normalized_amount'] >= 0.5].iterrows():
        # Check for None or NaN values in coordinates
        if (pd.isna(row['source_lon']) or pd.isna(row['source_lat']) or 
            pd.isna(row['target_lon']) or pd.isna(row['target_lat'])):
            continue
            
        lon_diff, lat_diff = row['target_lon'] - row['source_lon'], row['target_lat'] - row['source_lat']
        midpoint_offset = min(np.sqrt(lon_diff**2 + lat_diff**2) * 0.15, 10)
        curve_points = [(row['source_lon'], row['source_lat'])] + [
            ((1-t)**2 * row['source_lon'] + 2*(1-t)*t * ((row['source_lon'] + row['target_lon'])/2) + t**2 * row['target_lon'],
             (1-t)**2 * row['source_lat'] + 2*(1-t)*t * ((row['source_lat'] + row['target_lat'])/2 + midpoint_offset) + t**2 * row['target_lat'])
            for t in np.linspace(0, 1, 20)[1:-1]
        ] + [(row['target_lon'], row['target_lat'])]
        
        lons, lats = zip(*curve_points)
        # Fix the color transparency value
        alpha = min(0.8, 0.3 + row["normalized_amount"] * 0.05)
        color = f'rgba(70, 130, 180, {alpha:.2f})'
        
        # Create a custom_data field for identifying the flow when clicked
        custom_data = [row['send region'], row['receiver region'], row['amount']]
        
        fig.add_trace(go.Scattermapbox(
            lon=lons, lat=lats, mode='lines',
            line=dict(width=row['normalized_amount']*1.5, color=color),
            hoverinfo='text', 
            hovertext=f"{row['send region']} → {row['receiver region']}<br>Amount: ${row['amount']:,.2f}",
            customdata=[custom_data] * len(lons),  # Repeat the custom data for each point
            showlegend=False
        ))
    
    # Add regions
    # Filter out any rows with NaN coordinates
    valid_region_data = region_data.dropna(subset=['lon', 'lat'])
    if not valid_region_data.empty:
        fig.add_trace(go.Scattermapbox(
            lon=valid_region_data['lon'], 
            lat=valid_region_data['lat'], 
            text=valid_region_data['region'],
            customdata=valid_region_data[['inflow', 'outflow', 'net_flow', 'region']].values,
            hovertemplate='<b>%{text}</b><br>Inflow: $%{customdata[0]:,.2f}<br>Outflow: $%{customdata[1]:,.2f}<br>Net Flow: $%{customdata[2]:,.2f}',
            mode='markers',
            marker=dict(
                size=valid_region_data['total_volume'] / valid_region_data['total_volume'].max() * 25 + 10,
                color=valid_region_data['net_flow'], 
                colorscale='RdBu', 
                cmid=0, 
                opacity=0.8,
                colorbar=dict(title='Net Flow<br>(Inflow - Outflow)', thickness=15)
            ),
            name='Regions'
        ))
    
    fig.update_layout(
        title='Global Banking Transaction Flows Between Regions',
        mapbox=dict(
            style='carto-positron',
            zoom=1.2,
            center=dict(lat=20, lon=0),
            **({'accesstoken': mapbox_token, 'style': 'mapbox://styles/mapbox/light-v10'} if mapbox_token else {})
        ),
        width=1200, height=700, margin=dict(l=0, r=0, t=50, b=0)
    )
    
    return fig

def create_sankey_for_regions(df, send_region=None, receive_region=None, year=None):
    """
    Create a Sankey diagram for transactions between specified regions,
    showing a flow from sender → receiver → currency → mx/mt → mt → direction → amount
    without showing the regions themselves as nodes.
    
    Parameters:
    -----------
    df : pandas DataFrame
        The DataFrame containing transaction data
    send_region : str, optional
        Region to filter senders by
    receive_region : str, optional
        Region to filter receivers by
    year : int, optional
        Year to filter transactions by
    
    Returns:
    --------
    plotly.graph_objects.Figure
        A Sankey diagram figure
    """
    import pandas as pd
    import numpy as np
    import plotly.graph_objects as go
    
    # Create a copy to avoid modifying the original
    filtered_df = df.copy()
    
    # Filter data if regions are specified
    title_parts = []
    
    if send_region:
        filtered_df = filtered_df[filtered_df['send region'] == send_region]
        title_parts.append(f"from {send_region}")
    
    if receive_region:
        filtered_df = filtered_df[filtered_df['receiver region'] == receive_region]
        title_parts.append(f"to {receive_region}")
    
    # Filter by year if specified
    if year and 'year' in filtered_df.columns:
        filtered_df = filtered_df[filtered_df['year'] == year]
        title_parts.append(f"in {year}")
    
    if title_parts:
        title = f"Transactions {' '.join(title_parts)}"
    else:
        title = "All Regional Transactions"
    
    # Ensure amount categories exist (create if missing)
    if 'amount_category' not in filtered_df.columns:
        filtered_df['amount_category'] = pd.cut(
            filtered_df['amount'], 
            bins=[0, 1e6, 5e6, 1e7, float('inf')], 
            labels=['< 1M', '1M-5M', '5M-10M', '> 10M']
        )
    
    # Check if we have data after all the filters
    if filtered_df.empty:
        fig = go.Figure()
        fig.update_layout(
            title_text=f"No data available for {title}",
            annotations=[dict(
                text="No transactions found for the selected criteria",
                xref="paper", yref="paper",
                x=0.5, y=0.5, showarrow=False,
                font=dict(size=20)
            )]
        )
        return fig
    
    # Make sure all columns are properly formatted as strings
    str_columns = ['sender', 'receiver', 'currency', 'mx/mt', 'Mt', 'direction', 'amount_category']
    for col in str_columns:
        if col in filtered_df.columns and filtered_df[col].dtype != 'str':
            filtered_df[col] = filtered_df[col].astype(str)
    
    # Create the flows according to the specified path
    # sender → receiver
    flow1 = filtered_df.groupby(['sender', 'receiver']).size().reset_index(name='count')
    
    # receiver → currency
    flow2 = filtered_df.groupby(['receiver', 'currency']).size().reset_index(name='count')
    
    # currency → mx/mt
    flow3 = filtered_df.groupby(['currency', 'mx/mt']).size().reset_index(name='count')
    
    # mx/mt → mt
    flow4 = filtered_df.groupby(['mx/mt', 'Mt']).size().reset_index(name='count')
    
    # mt → direction
    flow5 = filtered_df.groupby(['Mt', 'direction']).size().reset_index(name='count')
    
    # direction → amount
    flow6 = filtered_df.groupby(['direction', 'amount_category']).size().reset_index(name='count')
    
    # Get all unique labels
    labels = pd.unique(
        flow1['sender'].tolist() +
        flow1['receiver'].tolist() +
        flow2['currency'].tolist() +
        flow3['mx/mt'].tolist() +
        flow4['Mt'].tolist() +
        flow5['direction'].tolist() +
        flow6['amount_category'].astype(str).tolist()
    )
    
    # Map labels to indices
    label_to_index = {label: i for i, label in enumerate(labels)}
    
    # Prepare sources, targets, and values for links
    source, target, value = [], [], []
    link_labels = []
    
    # sender → receiver
    for _, row in flow1.iterrows():
        source.append(label_to_index[row['sender']])
        target.append(label_to_index[row['receiver']])
        value.append(row['count'])
        link_labels.append(f"{row['sender']} → {row['receiver']}<br>Count: {row['count']}")
    
    # receiver → currency
    for _, row in flow2.iterrows():
        source.append(label_to_index[row['receiver']])
        target.append(label_to_index[row['currency']])
        value.append(row['count'])
        link_labels.append(f"{row['receiver']} → {row['currency']}<br>Count: {row['count']}")
    
    # currency → mx/mt
    for _, row in flow3.iterrows():
        source.append(label_to_index[row['currency']])
        target.append(label_to_index[row['mx/mt']])
        value.append(row['count'])
        link_labels.append(f"{row['currency']} → {row['mx/mt']}<br>Count: {row['count']}")
    
    # mx/mt → mt
    for _, row in flow4.iterrows():
        source.append(label_to_index[row['mx/mt']])
        target.append(label_to_index[row['Mt']])
        value.append(row['count'])
        link_labels.append(f"{row['mx/mt']} → {row['Mt']}<br>Count: {row['count']}")
    
    # mt → direction
    for _, row in flow5.iterrows():
        source.append(label_to_index[row['Mt']])
        target.append(label_to_index[row['direction']])
        value.append(row['count'])
        link_labels.append(f"{row['Mt']} → {row['direction']}<br>Count: {row['count']}")
    
    # direction → amount_category
    for _, row in flow6.iterrows():
        source.append(label_to_index[row['direction']])
        target.append(label_to_index[str(row['amount_category'])])
        value.append(row['count'])
        link_labels.append(f"{row['direction']} → {row['amount_category']}<br>Count: {row['count']}")
    
    # Set up colors for nodes and links
    num_nodes = len(labels)
    node_colors = ["rgba(31, 119, 180, 0.8)"] * num_nodes  # Default blue color
    
    # Create color groups for different node types
    sender_idx = [label_to_index[label] for label in pd.Series(flow1['sender']).unique()]
    receiver_idx = [label_to_index[label] for label in pd.Series(flow1['receiver']).unique()]
    currency_idx = [label_to_index[label] for label in pd.Series(flow2['currency']).unique()]
    mxmt_idx = [label_to_index[label] for label in pd.Series(flow3['mx/mt']).unique()]
    mt_idx = [label_to_index[label] for label in pd.Series(flow4['Mt']).unique()]
    direction_idx = [label_to_index[label] for label in pd.Series(flow5['direction']).unique()]
    amount_idx = [label_to_index[str(label)] for label in pd.Series(flow6['amount_category']).unique()]

    
    # Assign different colors to each node type
    for idx in sender_idx:
        node_colors[idx] = "rgba(214, 39, 40, 0.8)"  # Red for senders
    for idx in receiver_idx:
        node_colors[idx] = "rgba(44, 160, 44, 0.8)"  # Green for receivers
    for idx in currency_idx:
        node_colors[idx] = "rgba(255, 127, 14, 0.8)"  # Orange for currency
    for idx in mxmt_idx:
        node_colors[idx] = "rgba(148, 103, 189, 0.8)"  # Purple for mx/mt
    for idx in mt_idx:
        node_colors[idx] = "rgba(140, 86, 75, 0.8)"   # Brown for Mt
    for idx in direction_idx:
        node_colors[idx] = "rgba(23, 190, 207, 0.8)"  # Cyan for directions
    for idx in amount_idx:
        node_colors[idx] = "rgba(31, 119, 180, 0.8)"  # Blue for amount categories
    
    # Create colors for links based on their source
    link_colors = []
    for s in source:
        # Use a slightly transparent version of the source node color
        base_color = node_colors[s].replace("0.8", "0.6")
        link_colors.append(base_color)
    
    # Create the Sankey diagram
    fig = go.Figure(data=[go.Sankey(
        arrangement="snap",
        node=dict(
            pad=20,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=list(labels),
            color=node_colors
        ),
        link=dict(
            source=source,
            target=target,
            value=value,
            color=link_colors,
            customdata=np.array(link_labels),
            hovertemplate='%{customdata}<extra></extra>'
        )
    )])
    
    # Add annotations to describe each column
    fig.update_layout(
        title_text=title,
        font_size=12,
        height=600,
        width=1000,
        annotations=[
            dict(x=0.03, y=1, xref='paper', yref='paper', text='Sender', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.18, y=1, xref='paper', yref='paper', text='Receiver', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.32, y=1, xref='paper', yref='paper', text='Currency', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.47, y=1, xref='paper', yref='paper', text='MX/MT', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.62, y=1, xref='paper', yref='paper', text='MT', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.77, y=1, xref='paper', yref='paper', text='Direction', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.92, y=1, xref='paper', yref='paper', text='Amount', showarrow=False, font=dict(size=14, color='black')),
        ]
    )
    
    return fig

def visualize_banking_flows(df):
    # Ensure the expected columns exist
    required_columns = ['send region', 'receiver region', 'amount']
    missing_columns = [col for col in required_columns if col not in df.columns]
    
    if missing_columns:
        raise ValueError(f"Missing required columns in dataframe: {missing_columns}")
    
    # Check if year column exists, if not, provide a default
    if 'year' not in df.columns:
        print("Warning: 'year' column not found in data. Creating a dummy year column with value 2023.")
        df['year'] = 2023
    
    # Ensure all regions are in the coordinates dictionary
    unknown_send_regions = set(df['send region'].unique()) - set(region_coordinates.keys())
    unknown_recv_regions = set(df['receiver region'].unique()) - set(region_coordinates.keys())
    
    if unknown_send_regions:
        print(f"Warning: Unknown send regions: {unknown_send_regions}")
    if unknown_recv_regions:
        print(f"Warning: Unknown receiver regions: {unknown_recv_regions}")
    
    # Create the flow map
    flow_map = create_flow_map(df)
    
    # Get unique regions for dropdowns
    all_regions = sorted(list(set(df['send region'].unique()).union(set(df['receiver region'].unique()))))
    
    # Get unique years for the year dropdown
    years = sorted(df['year'].unique())
    
    # Create dropdown widgets
    send_dropdown = widgets.Dropdown(
        options=['All'] + all_regions,
        value='All',
        description='Sender Region:',
        layout=Layout(width='300px')
    )
    
    receive_dropdown = widgets.Dropdown(
        options=['All'] + all_regions,
        value='All',
        description='Receiver Region:',
        layout=Layout(width='300px')
    )
    
    # Create year dropdown
    year_dropdown = widgets.Dropdown(
        options=['All'] + list(years),
        value='All',
        description='Year:',
        layout=Layout(width='300px')
    )
    
    # Create output area for the Sankey diagram
    sankey_output = widgets.Output()
    
    def update_sankey(sender_region, receiver_region, year):
        with sankey_output:
            sankey_output.clear_output(wait=True)
            send_region = None if sender_region == 'All' else sender_region
            receive_region = None if receiver_region == 'All' else receiver_region
            selected_year = None if year == 'All' else year
            sankey_fig = create_sankey_for_regions(df, send_region, receive_region, selected_year)
            sankey_fig.show()
    
    # Create interactive widget with all three dropdowns
    interactive_widget = interactive(
        update_sankey,
        sender_region=send_dropdown,
        receiver_region=receive_dropdown,
        year=year_dropdown
    )
    
    # Display everything
    display(flow_map)
    display(interactive_widget)
    display(sankey_output)
    
    # Initial display
    update_sankey('All', 'All', 'All')
    
    return flow_map

# Example usage - comment these out if you're importing this as a module


In [20]:
df.columns

Index(['Count', 'sender', 'receiver', 'amount', 'currency', 'Mt', 'year',
       'direction', 'category', 'send region', 'receiver region', 'mx/mt',
       'transaction_status', 'payment_method'],
      dtype='object')

In [21]:
def create_sankey_for_regions(df, send_region=None, receive_region=None, year=None):
    """
    Create a Sankey diagram for transactions between specified regions,
    showing a flow from sender → receiver → currency → mx/mt → mt → direction → 
    transaction_status → payment_method → amount
    
    Parameters:
    -----------
    df : pandas DataFrame
        The DataFrame containing transaction data
    send_region : str, optional
        Region to filter senders by
    receive_region : str, optional
        Region to filter receivers by
    year : int, optional
        Year to filter transactions by
    
    Returns:
    --------
    plotly.graph_objects.Figure
        A Sankey diagram figure
    """
    import pandas as pd
    import numpy as np
    import plotly.graph_objects as go
    
    # Create a copy to avoid modifying the original
    filtered_df = df.copy()
    
    # Filter data if regions are specified
    title_parts = []
    
    if send_region:
        filtered_df = filtered_df[filtered_df['send region'] == send_region]
        title_parts.append(f"from {send_region}")
    
    if receive_region:
        filtered_df = filtered_df[filtered_df['receiver region'] == receive_region]
        title_parts.append(f"to {receive_region}")
    
    # Filter by year if specified
    if year and 'year' in filtered_df.columns:
        filtered_df = filtered_df[filtered_df['year'] == year]
        title_parts.append(f"in {year}")
    
    if title_parts:
        title = f"Transactions {' '.join(title_parts)}"
    else:
        title = "All Regional Transactions"
    
    # Ensure amount categories exist (create if missing)
    if 'amount_category' not in filtered_df.columns:
        filtered_df['amount_category'] = pd.cut(
            filtered_df['amount'], 
            bins=[0, 1e6, 5e6, 1e7, float('inf')], 
            labels=['< 1M', '1M-5M', '5M-10M', '> 10M']
        )
    
    # Check if we have data after all the filters
    if filtered_df.empty:
        fig = go.Figure()
        fig.update_layout(
            title_text=f"No data available for {title}",
            annotations=[dict(
                text="No transactions found for the selected criteria",
                xref="paper", yref="paper",
                x=0.5, y=0.5, showarrow=False,
                font=dict(size=20)
            )]
        )
        return fig
    
    # Make sure all columns are properly formatted as strings
    str_columns = ['sender', 'receiver', 'currency', 'mx/mt', 'Mt', 'direction', 
                'transaction_status', 'payment_method', 'amount_category']
    for col in str_columns:
        if col in filtered_df.columns and filtered_df[col].dtype != 'str':
            filtered_df[col] = filtered_df[col].astype(str)
    
    # Create the flows according to the specified path
    # sender → receiver
    flow1 = filtered_df.groupby(['sender', 'receiver']).size().reset_index(name='count')
    
    # receiver → currency
    flow2 = filtered_df.groupby(['receiver', 'currency']).size().reset_index(name='count')
    
    # currency → mx/mt
    flow3 = filtered_df.groupby(['currency', 'mx/mt']).size().reset_index(name='count')
    
    # mx/mt → mt
    flow4 = filtered_df.groupby(['mx/mt', 'Mt']).size().reset_index(name='count')
    
    # mt → direction
    flow5 = filtered_df.groupby(['Mt', 'direction']).size().reset_index(name='count')
    
    # direction → transaction_status (new flow)
    flow6 = filtered_df.groupby(['direction', 'transaction_status']).size().reset_index(name='count')
    
    # transaction_status → payment_method (new flow)
    flow7 = filtered_df.groupby(['transaction_status', 'payment_method']).size().reset_index(name='count')
    
    # payment_method → amount (new flow)
    flow8 = filtered_df.groupby(['payment_method', 'amount_category']).size().reset_index(name='count')
    
    # Get all unique labels
    labels = pd.unique(
        flow1['sender'].tolist() +
        flow1['receiver'].tolist() +
        flow2['currency'].tolist() +
        flow3['mx/mt'].tolist() +
        flow4['Mt'].tolist() +
        flow5['direction'].tolist() +
        flow6['transaction_status'].tolist() +
        flow7['payment_method'].tolist() +
        flow8['amount_category'].astype(str).tolist()
    )
    
    # Map labels to indices
    label_to_index = {label: i for i, label in enumerate(labels)}
    
    # Prepare sources, targets, and values for links
    source, target, value = [], [], []
    link_labels = []
    
    # sender → receiver
    for _, row in flow1.iterrows():
        source.append(label_to_index[row['sender']])
        target.append(label_to_index[row['receiver']])
        value.append(row['count'])
        link_labels.append(f"{row['sender']} → {row['receiver']}<br>Count: {row['count']}")
    
    # receiver → currency
    for _, row in flow2.iterrows():
        source.append(label_to_index[row['receiver']])
        target.append(label_to_index[row['currency']])
        value.append(row['count'])
        link_labels.append(f"{row['receiver']} → {row['currency']}<br>Count: {row['count']}")
    
    # currency → mx/mt
    for _, row in flow3.iterrows():
        source.append(label_to_index[row['currency']])
        target.append(label_to_index[row['mx/mt']])
        value.append(row['count'])
        link_labels.append(f"{row['currency']} → {row['mx/mt']}<br>Count: {row['count']}")
    
    # mx/mt → mt
    for _, row in flow4.iterrows():
        source.append(label_to_index[row['mx/mt']])
        target.append(label_to_index[row['Mt']])
        value.append(row['count'])
        link_labels.append(f"{row['mx/mt']} → {row['Mt']}<br>Count: {row['count']}")
    
    # mt → direction
    for _, row in flow5.iterrows():
        source.append(label_to_index[row['Mt']])
        target.append(label_to_index[row['direction']])
        value.append(row['count'])
        link_labels.append(f"{row['Mt']} → {row['direction']}<br>Count: {row['count']}")
    
    # direction → transaction_status (new flow)
    for _, row in flow6.iterrows():
        source.append(label_to_index[row['direction']])
        target.append(label_to_index[row['transaction_status']])
        value.append(row['count'])
        link_labels.append(f"{row['direction']} → {row['transaction_status']}<br>Count: {row['count']}")
    
    # transaction_status → payment_method (new flow)
    for _, row in flow7.iterrows():
        source.append(label_to_index[row['transaction_status']])
        target.append(label_to_index[row['payment_method']])
        value.append(row['count'])
        link_labels.append(f"{row['transaction_status']} → {row['payment_method']}<br>Count: {row['count']}")
    
    # payment_method → amount_category (new flow)
    for _, row in flow8.iterrows():
        source.append(label_to_index[row['payment_method']])
        target.append(label_to_index[str(row['amount_category'])])
        value.append(row['count'])
        link_labels.append(f"{row['payment_method']} → {row['amount_category']}<br>Count: {row['count']}")
    
    # Set up colors for nodes and links
    num_nodes = len(labels)
    node_colors = ["rgba(31, 119, 180, 0.8)"] * num_nodes  # Default blue color
    
    # Create color groups for different node types
    sender_idx = [label_to_index[label] for label in flow1['sender'].unique()]
    receiver_idx = [label_to_index[label] for label in flow1['receiver'].unique()]
    currency_idx = [label_to_index[label] for label in flow2['currency'].unique()]
    mxmt_idx = [label_to_index[label] for label in flow3['mx/mt'].unique()]
    mt_idx = [label_to_index[label] for label in flow4['Mt'].unique()]
    direction_idx = [label_to_index[label] for label in flow5['direction'].unique()]
    status_idx = [label_to_index[label] for label in flow6['transaction_status'].unique()]
    payment_idx = [label_to_index[label] for label in flow7['payment_method'].unique()]
    amount_idx = [label_to_index[str(label)] for label in flow8['amount_category'].unique()]
    
    # Assign different colors to each node type
    for idx in sender_idx:
        node_colors[idx] = "rgba(214, 39, 40, 0.8)"  # Red for senders
    for idx in receiver_idx:
        node_colors[idx] = "rgba(44, 160, 44, 0.8)"  # Green for receivers
    for idx in currency_idx:
        node_colors[idx] = "rgba(255, 127, 14, 0.8)"  # Orange for currency
    for idx in mxmt_idx:
        node_colors[idx] = "rgba(148, 103, 189, 0.8)"  # Purple for mx/mt
    for idx in mt_idx:
        node_colors[idx] = "rgba(140, 86, 75, 0.8)"   # Brown for Mt
    for idx in direction_idx:
        node_colors[idx] = "rgba(23, 190, 207, 0.8)"  # Cyan for directions
    for idx in status_idx:
        node_colors[idx] = "rgba(188, 189, 34, 0.8)"  # Yellow-green for transaction status
    for idx in payment_idx:
        node_colors[idx] = "rgba(127, 127, 127, 0.8)"  # Gray for payment method
    for idx in amount_idx:
        node_colors[idx] = "rgba(31, 119, 180, 0.8)"  # Blue for amount categories
    
    # Create colors for links based on their source
    link_colors = []
    for s in source:
        # Use a slightly transparent version of the source node color
        base_color = node_colors[s].replace("0.8", "0.6")
        link_colors.append(base_color)
    
    # Create the Sankey diagram
    fig = go.Figure(data=[go.Sankey(
        arrangement="snap",
        node=dict(
            pad=20,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=list(labels),
            color=node_colors
        ),
        link=dict(
            source=source,
            target=target,
            value=value,
            color=link_colors,
            customdata=np.array(link_labels),
            hovertemplate='%{customdata}<extra></extra>'
        )
    )])
    
    # Add annotations to describe each column
    fig.update_layout(
        title_text=title,
        font_size=12,
        height=800,  # Increased height to accommodate more nodes
        width=1200,  # Increased width to accommodate more columns
        annotations=[
            dict(x=0.02, y=1, xref='paper', yref='paper', text='Sender', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.14, y=1, xref='paper', yref='paper', text='Receiver', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.25, y=1, xref='paper', yref='paper', text='Currency', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.37, y=1, xref='paper', yref='paper', text='MX/MT', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.48, y=1, xref='paper', yref='paper', text='MT', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.60, y=1, xref='paper', yref='paper', text='Direction', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.71, y=1, xref='paper', yref='paper', text='Status', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.83, y=1, xref='paper', yref='paper', text='Payment', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.94, y=1, xref='paper', yref='paper', text='Amount', showarrow=False, font=dict(size=14, color='black')),
        ]
    )
    
    return fig

def visualize_banking_flows(df):
    # Ensure the expected columns exist
    required_columns = ['send region', 'receiver region', 'amount', 'transaction_status', 'payment_method']
    missing_columns = [col for col in required_columns if col not in df.columns]
    
    if missing_columns:
        raise ValueError(f"Missing required columns in dataframe: {missing_columns}")
    
    # Check if year column exists, if not, provide a default
    if 'year' not in df.columns:
        print("Warning: 'year' column not found in data. Creating a dummy year column with value 2023.")
        df['year'] = 2023
    
    # Ensure all regions are in the coordinates dictionary
    unknown_send_regions = set(df['send region'].unique()) - set(region_coordinates.keys())
    unknown_recv_regions = set(df['receiver region'].unique()) - set(region_coordinates.keys())
    
    if unknown_send_regions:
        print(f"Warning: Unknown send regions: {unknown_send_regions}")
    if unknown_recv_regions:
        print(f"Warning: Unknown receiver regions: {unknown_recv_regions}")
    
    # Create the flow map
    flow_map = create_flow_map(df)
    
    # Get unique regions for dropdowns
    all_regions = sorted(list(set(df['send region'].unique()).union(set(df['receiver region'].unique()))))
    
    # Get unique years for the year dropdown
    years = sorted(df['year'].unique())
    
    # Create dropdown widgets
    send_dropdown = widgets.Dropdown(
        options=['All'] + all_regions,
        value='All',
        description='Sender Region:',
        layout=Layout(width='300px')
    )
    
    receive_dropdown = widgets.Dropdown(
        options=['All'] + all_regions,
        value='All',
        description='Receiver Region:',
        layout=Layout(width='300px')
    )
    
    # Create year dropdown
    year_dropdown = widgets.Dropdown(
        options=['All'] + list(years),
        value='All',
        description='Year:',
        layout=Layout(width='300px')
    )
    
    # Create output area for the Sankey diagram
    sankey_output = widgets.Output()
    
    def update_sankey(sender_region, receiver_region, year):
        with sankey_output:
            sankey_output.clear_output(wait=True)
            send_region = None if sender_region == 'All' else sender_region
            receive_region = None if receiver_region == 'All' else receiver_region
            selected_year = None if year == 'All' else year
            sankey_fig = create_sankey_for_regions(df, send_region, receive_region, selected_year)
            sankey_fig.show()
    
    # Create interactive widget with all three dropdowns
    interactive_widget = interactive(
        update_sankey,
        sender_region=send_dropdown,
        receiver_region=receive_dropdown,
        year=year_dropdown
    )
    
    # Display everything
    display(flow_map)
    display(interactive_widget)
    display(sankey_output)
    
    # Initial display
    update_sankey('All', 'All', 'All')
    
    return flow_map
visualize_banking_flows(df) 


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



interactive(children=(Dropdown(description='Sender Region:', layout=Layout(width='300px'), options=('All', np.…

Output()

In [62]:
def create_sankey_for_regions(df, send_region=None, receive_region=None, year=None):
    """
    Create an enhanced Sankey diagram for transactions between specified regions,
    with hover animations and flow animations for the links.
    
    Parameters:
    -----------
    df : pandas DataFrame
        The DataFrame containing transaction data
    send_region : str, optional
        Region to filter senders by
    receive_region : str, optional
        Region to filter receivers by
    year : int, optional
        Year to filter transactions by
    
    Returns:
    --------
    plotly.graph_objects.Figure
        A Sankey diagram figure with enhanced animations
    """
    import pandas as pd
    import numpy as np
    import plotly.graph_objects as go
    
    # Create a copy to avoid modifying the original
    filtered_df = df.copy()
    
    # Filter data if regions are specified
    title_parts = []
    
    if send_region:
        filtered_df = filtered_df[filtered_df['send region'] == send_region]
        title_parts.append(f"from {send_region}")
    
    if receive_region:
        filtered_df = filtered_df[filtered_df['receiver region'] == receive_region]
        title_parts.append(f"to {receive_region}")
    
    # Filter by year if specified
    if year and 'year' in filtered_df.columns:
        filtered_df = filtered_df[filtered_df['year'] == year]
        title_parts.append(f"in {year}")
    
    if title_parts:
        title = f"Transactions {' '.join(title_parts)}"
    else:
        title = "All Regional Transactions"
    
    # Ensure amount categories exist (create if missing)
    if 'amount_category' not in filtered_df.columns:
        filtered_df['amount_category'] = pd.cut(
            filtered_df['amount'], 
            bins=[0, 1e6, 5e6, 1e7, float('inf')], 
            labels=['< 1M', '1M-5M', '5M-10M', '> 10M']
        )
    
    # Check if we have data after all the filters
    if filtered_df.empty:
        fig = go.Figure()
        fig.update_layout(
            title_text=f"No data available for {title}",
            annotations=[dict(
                text="No transactions found for the selected criteria",
                xref="paper", yref="paper",
                x=0.5, y=0.5, showarrow=False,
                font=dict(size=20)
            )]
        )
        return fig
    
    # Make sure all columns are properly formatted as strings
    str_columns = ['sender', 'receiver', 'currency', 'mx/mt', 'Mt', 'direction', 
                  'transaction_status', 'payment_method', 'amount_category']
    for col in str_columns:
        if col in filtered_df.columns and filtered_df[col].dtype != 'str':
            filtered_df[col] = filtered_df[col].astype(str)
    
    # Create the flows according to the specified path
    # sender → receiver
    flow1 = filtered_df.groupby(['sender', 'receiver']).size().reset_index(name='count')
    
    # receiver → currency
    flow2 = filtered_df.groupby(['receiver', 'currency']).size().reset_index(name='count')
    
    # currency → mx/mt
    flow3 = filtered_df.groupby(['currency', 'mx/mt']).size().reset_index(name='count')
    
    # mx/mt → mt
    flow4 = filtered_df.groupby(['mx/mt', 'Mt']).size().reset_index(name='count')
    
    # mt → direction
    flow5 = filtered_df.groupby(['Mt', 'direction']).size().reset_index(name='count')
    
    # direction → transaction_status (new flow)
    flow6 = filtered_df.groupby(['direction', 'transaction_status']).size().reset_index(name='count')
    
    # transaction_status → payment_method (new flow)
    flow7 = filtered_df.groupby(['transaction_status', 'payment_method']).size().reset_index(name='count')
    
    # payment_method → amount (new flow)
    flow8 = filtered_df.groupby(['payment_method', 'amount_category']).size().reset_index(name='count')
    
    # Get all unique labels
    labels = pd.unique(
        flow1['sender'].tolist() +
        flow1['receiver'].tolist() +
        flow2['currency'].tolist() +
        flow3['mx/mt'].tolist() +
        flow4['Mt'].tolist() +
        flow5['direction'].tolist() +
        flow6['transaction_status'].tolist() +
        flow7['payment_method'].tolist() +
        flow8['amount_category'].astype(str).tolist()
    )
    
    # Map labels to indices
    label_to_index = {label: i for i, label in enumerate(labels)}
    
    # Prepare sources, targets, and values for links
    source, target, value = [], [], []
    link_labels = []
    
    # sender → receiver
    for _, row in flow1.iterrows():
        source.append(label_to_index[row['sender']])
        target.append(label_to_index[row['receiver']])
        value.append(row['count'])
        link_labels.append(f"{row['sender']} → {row['receiver']}<br>Count: {row['count']}")
    
    # receiver → currency
    for _, row in flow2.iterrows():
        source.append(label_to_index[row['receiver']])
        target.append(label_to_index[row['currency']])
        value.append(row['count'])
        link_labels.append(f"{row['receiver']} → {row['currency']}<br>Count: {row['count']}")
    
    # currency → mx/mt
    for _, row in flow3.iterrows():
        source.append(label_to_index[row['currency']])
        target.append(label_to_index[row['mx/mt']])
        value.append(row['count'])
        link_labels.append(f"{row['currency']} → {row['mx/mt']}<br>Count: {row['count']}")
    
    # mx/mt → mt
    for _, row in flow4.iterrows():
        source.append(label_to_index[row['mx/mt']])
        target.append(label_to_index[row['Mt']])
        value.append(row['count'])
        link_labels.append(f"{row['mx/mt']} → {row['Mt']}<br>Count: {row['count']}")
    
    # mt → direction
    for _, row in flow5.iterrows():
        source.append(label_to_index[row['Mt']])
        target.append(label_to_index[row['direction']])
        value.append(row['count'])
        link_labels.append(f"{row['Mt']} → {row['direction']}<br>Count: {row['count']}")
    
    # direction → transaction_status (new flow)
    for _, row in flow6.iterrows():
        source.append(label_to_index[row['direction']])
        target.append(label_to_index[row['transaction_status']])
        value.append(row['count'])
        link_labels.append(f"{row['direction']} → {row['transaction_status']}<br>Count: {row['count']}")
    
    # transaction_status → payment_method (new flow)
    for _, row in flow7.iterrows():
        source.append(label_to_index[row['transaction_status']])
        target.append(label_to_index[row['payment_method']])
        value.append(row['count'])
        link_labels.append(f"{row['transaction_status']} → {row['payment_method']}<br>Count: {row['count']}")
    
    # payment_method → amount_category (new flow)
    for _, row in flow8.iterrows():
        source.append(label_to_index[row['payment_method']])
        target.append(label_to_index[str(row['amount_category'])])
        value.append(row['count'])
        link_labels.append(f"{row['payment_method']} → {row['amount_category']}<br>Count: {row['count']}")
    
    # Set up colors for nodes and links
    num_nodes = len(labels)
    node_colors = ["rgba(31, 119, 180, 0.8)"] * num_nodes  # Default blue color
    
    # Create color groups for different node types
   
    sender_idx = [label_to_index[label] for label in pd.Series(flow1['sender']).unique()]
    receiver_idx = [label_to_index[label] for label in pd.Series(flow1['receiver']).unique()]
    currency_idx = [label_to_index[label] for label in pd.Series(flow2['currency']).unique()]
    mxmt_idx = [label_to_index[label] for label in pd.Series(flow3['mx/mt']).unique()]
    mt_idx = [label_to_index[label] for label in pd.Series(flow4['Mt']).unique()]
    direction_idx = [label_to_index[label] for label in pd.Series(flow5['direction']).unique()]
    status_idx = [label_to_index[label] for label in pd.Series(flow6['transaction_status']).unique()]
    payment_idx = [label_to_index[label] for label in pd.Series(flow7['payment_method']).unique()]
    amount_idx = [label_to_index[str(label)] for label in pd.Series(flow8['amount_category']).unique()]
    # Assign different colors to each node type
    for idx in sender_idx:
        node_colors[idx] = "rgba(214, 39, 40, 0.8)"  # Red for senders
    for idx in receiver_idx:
        node_colors[idx] = "rgba(44, 160, 44, 0.8)"  # Green for receivers
    for idx in currency_idx:
        node_colors[idx] = "rgba(255, 127, 14, 0.8)"  # Orange for currency
    for idx in mxmt_idx:
        node_colors[idx] = "rgba(148, 103, 189, 0.8)"  # Purple for mx/mt
    for idx in mt_idx:
        node_colors[idx] = "rgba(140, 86, 75, 0.8)"   # Brown for Mt
    for idx in direction_idx:
        node_colors[idx] = "rgba(23, 190, 207, 0.8)"  # Cyan for directions
    for idx in status_idx:
        node_colors[idx] = "rgba(188, 189, 34, 0.8)"  # Yellow-green for transaction status
    for idx in payment_idx:
        node_colors[idx] = "rgba(127, 127, 127, 0.8)"  # Gray for payment method
    for idx in amount_idx:
        node_colors[idx] = "rgba(31, 119, 180, 0.8)"  # Blue for amount categories
    
    # Create base and hover colors for links based on their source
    link_colors = []
    link_hover_colors = []
    
    for s in source:
        # Normal state: semi-transparent
        base_color = node_colors[s].replace("0.8", "0.4")  # More transparent for normal state
        # Hover state: more opaque and brighter
        hover_color = node_colors[s].replace("0.8", "0.9")  # More opaque for hover state
        
        link_colors.append(base_color)
        link_hover_colors.append(hover_color)
    
    # Create the Sankey diagram with enhanced styling
    fig = go.Figure(data=[go.Sankey(
        arrangement="snap",
        node=dict(
            pad=20,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=list(labels),
            color=node_colors,
            # Add hover effect to nodes
            hoverinfo="all",
            hoverlabel=dict(
                bgcolor="white",
                font_size=14,
                font_family="Arial"
            )
        ),
        link=dict(
            source=source,
            target=target,
            value=value,
            color=link_colors,
            # Add animated hover effect for links
            customdata=np.array(link_labels),
            hovertemplate='%{customdata}<extra></extra>',
            hoverlabel=dict(
                bgcolor="white",
                font_size=14,
                font_family="Arial"
            )
        )
    )])
    
    # Add flow animation using frames
    # We'll create frames to animate link opacity to simulate flow
    frames = []
    
    # Create several frames with different link opacities to simulate flow
    for i in range(10):
        # Generate a flow pattern that moves from left to right
        phase = i / 10.0
        flow_colors = []
        
        for s, t in zip(source, target):
            # Calculate distance from left to right (normalized)
            pos = (s + t) / (2 * num_nodes)
            
            # Create a wave pattern that moves across the links
            intensity = 0.5 + 0.4 * np.sin(2 * np.pi * (pos - phase))
            
            # Apply the intensity to the base color
            base_color = node_colors[s].replace("0.8", str(intensity))
            flow_colors.append(base_color)
        
        # Create a frame with the current flow pattern
        frames.append(go.Frame(
            data=[go.Sankey(
                link=dict(color=flow_colors)
            )],
            name=f"frame{i}"
        ))
    
    # Add frames to the figure
    fig.frames = frames
    
    # Add play button for animation
    fig.update_layout(
        updatemenus=[
            dict(
                type="buttons",
                showactive=False,
                buttons=[
                    dict(
                        label="Play Flow",
                        method="animate",
                        args=[
                            None,
                            dict(
                                frame=dict(duration=200, redraw=True),
                                fromcurrent=True,
                                mode="immediate",
                                transition=dict(duration=200)
                            )
                        ]
                    )
                ],
                x=0.1,
                y=1.15,
            )
        ]
    )
    
    # Add annotations to describe each column
    fig.update_layout(
        title_text=title,
        font_size=12,
        height=800,  # Increased height to accommodate more nodes
        width=1200,  # Increased width to accommodate more columns
        # Add hover mode settings to improve interactivity
        hovermode="closest",
        hoverdistance=10,
        # Add transition settings for smoother animations
        transition=dict(
            duration=500,
            easing="cubic-in-out"
        ),
        annotations=[
            dict(x=0.02, y=1, xref='paper', yref='paper', text='Sender', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.14, y=1, xref='paper', yref='paper', text='Receiver', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.25, y=1, xref='paper', yref='paper', text='Currency', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.37, y=1, xref='paper', yref='paper', text='MX/MT', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.48, y=1, xref='paper', yref='paper', text='MT', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.60, y=1, xref='paper', yref='paper', text='Direction', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.71, y=1, xref='paper', yref='paper', text='Status', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.83, y=1, xref='paper', yref='paper', text='Payment', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.94, y=1, xref='paper', yref='paper', text='Amount', showarrow=False, font=dict(size=14, color='black')),
        ]
    )
    
    # Add custom CSS for hover effects
    fig.update_layout(
        hoverlabel_bgcolor="white",
        hoverlabel_font_size=14,
        hoverlabel_font_family="Arial"
    )
    
    # Add configuration options for better interactivity
    fig.update_layout(
        dragmode="pan",
        clickmode="event+select"
    )
    
    return fig

def visualize_banking_flows(df):
    """
    Create an interactive visualization of banking flows with enhanced animations.
    
    Parameters:
    -----------
    df : pandas DataFrame
        The DataFrame containing transaction data
    
    Returns:
    --------
    plotly.graph_objects.Figure
        The flow map visualization
    """
    # Ensure the expected columns exist
    required_columns = ['send region', 'receiver region', 'amount', 'transaction_status', 'payment_method']
    missing_columns = [col for col in required_columns if col not in df.columns]

    # Raise error if essential columns are missing
    if missing_columns:
        raise ValueError(f"Missing required columns in dataframe: {missing_columns}")

    # Add default year if not present
    if 'year' not in df.columns:
        print("Warning: 'year' column not found in data. Creating a dummy year column with value 2023.")
        df['year'] = 2023
    else:
        # Fill any missing year values with a default
        df['year'] = df['year'].fillna(2023)

    # Ensure all regions are in the coordinates dictionary
    send_regions = df['send region'].dropna().unique()
    recv_regions = df['receiver region'].dropna().unique()

    unknown_send_regions = set(send_regions) - set(region_coordinates.keys())
    unknown_recv_regions = set(recv_regions) - set(region_coordinates.keys())

    if unknown_send_regions:
        print(f"Warning: Unknown send regions: {unknown_send_regions}")
    if unknown_recv_regions:
        print(f"Warning: Unknown receiver regions: {unknown_recv_regions}")

    # Build the flow map (assumes create_flow_map is already defined)
    flow_map = create_flow_map(df)

    # Collect unique regions for dropdowns (excluding missing values)
    all_regions = sorted(set(send_regions).union(set(recv_regions)))

    # Collect valid years (filtering non-numeric entries, if any)
    years = sorted(df['year'].dropna().unique())

    
    # Create dropdown widgets with improved styling
    send_dropdown = widgets.Dropdown(
        options=['All'] + all_regions,
        value='All',
        description='Sender Region:',
        layout=Layout(width='300px'),
        style={'description_width': 'initial'}
    )
    
    receive_dropdown = widgets.Dropdown(
        options=['All'] + all_regions,
        value='All',
        description='Receiver Region:',
        layout=Layout(width='300px'),
        style={'description_width': 'initial'}
    )
    
    # Create year dropdown
    year_dropdown = widgets.Dropdown(
        options=['All'] + list(years),
        value='All',
        description='Year:',
        layout=Layout(width='300px'),
        style={'description_width': 'initial'}
    )
    
    # Add animation toggle button
    animation_button = widgets.ToggleButton(
        value=False,
        description='Toggle Flow Animation',
        disabled=False,
        button_style='info',
        tooltip='Toggle the flow animation on/off',
        icon='play'
    )
    
    # Create output area for the Sankey diagram
    sankey_output = widgets.Output()
    
    # Create status message for user feedback
    status_message = widgets.HTML(
        value="<div style='padding: 10px; color: #555;'>Select options and click Update to visualize banking flows.</div>"
    )
    
    # Create an update button for better user experience
    update_button = widgets.Button(
        description='Update Visualization',
        button_style='primary',
        tooltip='Click to update the visualization',
        icon='refresh'
    )
    
    def update_sankey(sender_region, receiver_region, year, animate):
        with sankey_output:
            sankey_output.clear_output(wait=True)
            status_message.value = "<div style='padding: 10px; color: #555;'>Generating visualization...</div>"
            
            send_region = None if sender_region == 'All' else sender_region
            receive_region = None if receiver_region == 'All' else receiver_region
            selected_year = None if year == 'All' else year
            
            # Create the Sankey diagram
            sankey_fig = create_sankey_for_regions(df, send_region, receive_region, selected_year)
            
            # Start animation if toggle is on
            if animate:
                sankey_fig.update_layout(
                    updatemenus=[dict(
                        type="buttons",
                        showactive=False,
                        buttons=[dict(
                            label="Play Flow",
                            method="animate",
                            args=[None, dict(frame=dict(duration=200, redraw=True), fromcurrent=True, mode="immediate")]
                        )],
                        active=0
                    )]
                )
            
            # Display the figure
            sankey_fig.show()
            
            # Update status message
            filters = []
            if send_region:
                filters.append(f"sender: {send_region}")
            if receive_region:
                filters.append(f"receiver: {receive_region}")
            if selected_year:
                filters.append(f"year: {selected_year}")
                
            if filters:
                filter_text = ", ".join(filters)
                status_message.value = f"<div style='padding: 10px; color: #555;'>Showing transactions with {filter_text}</div>"
            else:
                status_message.value = "<div style='padding: 10px; color: #555;'>Showing all transactions</div>"
    
    def on_update_button_click(b):
        update_sankey(send_dropdown.value, receive_dropdown.value, year_dropdown.value, animation_button.value)
    
    update_button.on_click(on_update_button_click)
    
    # Layout the widgets in a more organized way
    filters_box = widgets.HBox([send_dropdown, receive_dropdown, year_dropdown])
    controls_box = widgets.HBox([animation_button, update_button])
    
    # Create a styled container for the interface
    container = widgets.VBox([
        widgets.HTML("<h2 style='color: #333;'>Banking Flows Analysis Dashboard</h2>"),
        widgets.HTML("<p style='color: #555;'>Use the controls below to explore transaction flows between regions.</p>"),
        filters_box,
        controls_box,
        status_message,
        sankey_output
    ], layout=Layout(border='1px solid #ddd', padding='20px', margin='10px'))
    
    # Display everything
    display(flow_map)
    display(container)
    
    # Initial display
    update_sankey('All', 'All', 'All', animation_button.value)
    
    return flow_map

In [63]:
visualize_banking_flows(df) 



VBox(children=(HTML(value="<h2 style='color: #333;'>Banking Flows Analysis Dashboard</h2>"), HTML(value="<p st…

In [83]:
# Dictionary of region coordinates (latitude, longitude)
region_coordinates = {
    'North America': (40.0, -100.0),
    'South America': (-15.0, -60.0),
    'Europe': (50.0, 10.0),
    'Africa': (0.0, 20.0),
    'Middle East': (25.0, 45.0),
    'Asia': (30.0, 100.0),
    'Australia': (-25.0, 135.0),
    'Caribbean': (20.0, -75.0),
    'Central America': (15.0, -90.0),
    'Southeast Asia': (10.0, 105.0),
    'East Asia': (35.0, 120.0),
    'South Asia': (25.0, 80.0),
    'Central Asia': (45.0, 60.0),
    'Eastern Europe': (55.0, 30.0),
    'Northern Europe': (60.0, 15.0),
    'Southern Europe': (40.0, 15.0),
    'Western Europe': (48.0, 5.0),
    'Northern Africa': (25.0, 15.0),
    'Western Africa': (10.0, 0.0),
    'Eastern Africa': (5.0, 35.0),
    'Southern Africa': (-25.0, 25.0),
    'Central Africa': (0.0, 20.0),
    'Oceania': (-10.0, 150.0),
    'Pacific Islands': (0.0, 160.0)
}
import plotly.express as px
import plotly.express as px
import pandas as pd

def plot_transaction_time_series(df, 
                                  value_column='amount',
                                  time_column='year',
                                  group_by='send bank',  # Can also be 'receiver bank'
                                  aggfunc='sum',
                                  title='Transaction Trends Over Time by Bank',
                                  status_filter=None,
                                  payment_filter=None,
                                  region_filter=None):
    """
    Create a detailed interactive time series plot for transaction volume or amount over time, grouped by banks.

    Parameters:
    - df (pd.DataFrame): The transaction data.
    - value_column (str): Column to aggregate ('amount' or 'Count').
    - time_column (str): Column representing time ('year', 'month', or full datetime).
    - group_by (str): Column to group lines by (e.g., 'send bank', 'receiver bank').
    - aggfunc (str): Aggregation function ('sum' or 'count').
    - title (str): Title of the plot.
    - status_filter (str): Optional filter for 'transaction_status'.
    - payment_filter (str): Optional filter for 'payment_method'.
    - region_filter (str): Optional filter for specific region ('send region' or 'receiver region').

    Returns:
    - plotly.graph_objs._figure.Figure
    """
    # Apply optional filters
    if status_filter:
        df = df[df['transaction_status'] == status_filter]
    if payment_filter:
        df = df[df['payment_method'] == payment_filter]
    if region_filter and region_filter in df.columns:
        df = df[df[region_filter].notna()]  # or specify a region like df[df[region_filter] == 'Delhi']

    # Group and aggregate
    grouped = df.groupby([time_column, group_by])[value_column].agg(aggfunc).reset_index()
    grouped.rename(columns={value_column: 'value'}, inplace=True)

    fig = px.line(
        grouped,
        x=time_column,
        y='value',
        color=group_by,
        markers=True,
        title=title,
        labels={
            time_column: time_column.replace('_', ' ').title(),
            'value': value_column.title(),
            group_by: group_by.replace('_', ' ').title()
        }
    )

    fig.update_layout(
        xaxis=dict(dtick=1),
        yaxis_title=value_column.title(),
        legend_title=group_by.replace('_', ' ').title()
    )
    
    fig.show()
    return fig



def plot_transaction_heatmap(df, 
                              value_column='Count', 
                              aggfunc='count', 
                              title='Heatmap of Transaction Volumes Between Regions',
                              status_filter=None):
    """
    Create an interactive heatmap showing transaction volumes between regions.

    Parameters:
    - df (pd.DataFrame): The transaction data.
    - value_column (str): The column to aggregate (e.g., 'Count' for frequency or 'amount' for value).
    - aggfunc (str): Aggregation function: 'count' or 'sum'.
    - title (str): Title of the heatmap.
    - status_filter (str): Optional filter for transaction_status (e.g., 'Completed').

    Returns:
    - plotly.graph_objs._figure.Figure
    """
    if status_filter:
        df = df[df['transaction_status'] == status_filter]

    # Create pivot table
    matrix = df.pivot_table(
        index='send region',
        columns='receiver region',
        values=value_column,
        aggfunc=aggfunc,
        fill_value=0
    )

    # Generate heatmap
    fig = px.imshow(
        matrix,
        labels=dict(x="Receiver Region", y="Sender Region", color=value_column.title()),
        x=matrix.columns,
        y=matrix.index,
        color_continuous_scale='Viridis',
        text_auto=True
    )

    fig.update_layout(
        title=title,
        xaxis_title="Receiver Region",
        yaxis_title="Sender Region",
        autosize=True
    )

    fig.show()
    return fig

def create_sankey_for_regions(df, send_region=None, receive_region=None, year=None):
    """
    Create an enhanced Sankey diagram for transactions between specified regions,
    with hover animations and flow animations for the links.
    
    Parameters:
    -----------
    df : pandas DataFrame
        The DataFrame containing transaction data
    send_region : str, optional
        Region to filter senders by
    receive_region : str, optional
        Region to filter receivers by
    year : int, optional
        Year to filter transactions by
    
    Returns:
    --------
    plotly.graph_objects.Figure
        A Sankey diagram figure with enhanced animations
    """
    import pandas as pd
    import numpy as np
    import plotly.graph_objects as go
    
    # Create a copy to avoid modifying the original
    filtered_df = df.copy()
    
    # Filter data if regions are specified
    title_parts = []
    
    if send_region:
        filtered_df = filtered_df[filtered_df['send region'] == send_region]
        title_parts.append(f"from {send_region}")
    
    if receive_region:
        filtered_df = filtered_df[filtered_df['receiver region'] == receive_region]
        title_parts.append(f"to {receive_region}")
    
    # Filter by year if specified
    if year and 'year' in filtered_df.columns:
        filtered_df = filtered_df[filtered_df['year'] == year]
        title_parts.append(f"in {year}")
    
    if title_parts:
        title = f"Transactions {' '.join(title_parts)}"
    else:
        title = "All Regional Transactions"
    
    # Ensure amount categories exist (create if missing)
    if 'amount_category' not in filtered_df.columns:
        filtered_df['amount_category'] = pd.cut(
            filtered_df['amount'], 
            bins=[0, 1e6, 5e6, 1e7, float('inf')], 
            labels=['< 1M', '1M-5M', '5M-10M', '> 10M']
        )
    
    # Check if we have data after all the filters
    if filtered_df.empty:
        fig = go.Figure()
        fig.update_layout(
            title_text=f"No data available for {title}",
            annotations=[dict(
                text="No transactions found for the selected criteria",
                xref="paper", yref="paper",
                x=0.5, y=0.5, showarrow=False,
                font=dict(size=20)
            )]
        )
        return fig
    
    # Make sure all columns are properly formatted as strings
    str_columns = ['sender', 'receiver', 'currency', 'mx/mt', 'Mt', 'direction', 
                  'transaction_status', 'payment_method', 'amount_category']
    for col in str_columns:
        if col in filtered_df.columns and filtered_df[col].dtype != 'str':
            filtered_df[col] = filtered_df[col].astype(str)
    
    # Create the flows according to the specified path
    # sender → receiver
    flow1 = filtered_df.groupby(['sender', 'receiver']).size().reset_index(name='count')
    
    # receiver → currency
    flow2 = filtered_df.groupby(['receiver', 'currency']).size().reset_index(name='count')
    
    # currency → mx/mt
    flow3 = filtered_df.groupby(['currency', 'mx/mt']).size().reset_index(name='count')
    
    # mx/mt → mt
    flow4 = filtered_df.groupby(['mx/mt', 'Mt']).size().reset_index(name='count')
    
    # mt → direction
    flow5 = filtered_df.groupby(['Mt', 'direction']).size().reset_index(name='count')
    
    # direction → transaction_status (new flow)
    flow6 = filtered_df.groupby(['direction', 'transaction_status']).size().reset_index(name='count')
    
    # transaction_status → payment_method (new flow)
    flow7 = filtered_df.groupby(['transaction_status', 'payment_method']).size().reset_index(name='count')
    
    # payment_method → amount (new flow)
    flow8 = filtered_df.groupby(['payment_method', 'amount_category']).size().reset_index(name='count')
    
    # Get all unique labels
    labels = pd.unique(
        flow1['sender'].tolist() +
        flow1['receiver'].tolist() +
        flow2['currency'].tolist() +
        flow3['mx/mt'].tolist() +
        flow4['Mt'].tolist() +
        flow5['direction'].tolist() +
        flow6['transaction_status'].tolist() +
        flow7['payment_method'].tolist() +
        flow8['amount_category'].astype(str).tolist()
    )
    
    # Map labels to indices
    label_to_index = {label: i for i, label in enumerate(labels)}
    
    # Prepare sources, targets, and values for links
    source, target, value = [], [], []
    link_labels = []
    
    # sender → receiver
    for _, row in flow1.iterrows():
        source.append(label_to_index[row['sender']])
        target.append(label_to_index[row['receiver']])
        value.append(row['count'])
        link_labels.append(f"{row['sender']} → {row['receiver']}<br>Count: {row['count']}")
    
    # receiver → currency
    for _, row in flow2.iterrows():
        source.append(label_to_index[row['receiver']])
        target.append(label_to_index[row['currency']])
        value.append(row['count'])
        link_labels.append(f"{row['receiver']} → {row['currency']}<br>Count: {row['count']}")
    
    # currency → mx/mt
    for _, row in flow3.iterrows():
        source.append(label_to_index[row['currency']])
        target.append(label_to_index[row['mx/mt']])
        value.append(row['count'])
        link_labels.append(f"{row['currency']} → {row['mx/mt']}<br>Count: {row['count']}")
    
    # mx/mt → mt
    for _, row in flow4.iterrows():
        source.append(label_to_index[row['mx/mt']])
        target.append(label_to_index[row['Mt']])
        value.append(row['count'])
        link_labels.append(f"{row['mx/mt']} → {row['Mt']}<br>Count: {row['count']}")
    
    # mt → direction
    for _, row in flow5.iterrows():
        source.append(label_to_index[row['Mt']])
        target.append(label_to_index[row['direction']])
        value.append(row['count'])
        link_labels.append(f"{row['Mt']} → {row['direction']}<br>Count: {row['count']}")
    
    # direction → transaction_status (new flow)
    for _, row in flow6.iterrows():
        source.append(label_to_index[row['direction']])
        target.append(label_to_index[row['transaction_status']])
        value.append(row['count'])
        link_labels.append(f"{row['direction']} → {row['transaction_status']}<br>Count: {row['count']}")
    
    # transaction_status → payment_method (new flow)
    for _, row in flow7.iterrows():
        source.append(label_to_index[row['transaction_status']])
        target.append(label_to_index[row['payment_method']])
        value.append(row['count'])
        link_labels.append(f"{row['transaction_status']} → {row['payment_method']}<br>Count: {row['count']}")
    
    # payment_method → amount_category (new flow)
    for _, row in flow8.iterrows():
        source.append(label_to_index[row['payment_method']])
        target.append(label_to_index[str(row['amount_category'])])
        value.append(row['count'])
        link_labels.append(f"{row['payment_method']} → {row['amount_category']}<br>Count: {row['count']}")
    
    # Set up colors for nodes and links
    num_nodes = len(labels)
    node_colors = ["rgba(31, 119, 180, 0.8)"] * num_nodes  # Default blue color
    
    # Create color groups for different node types
    sender_idx    = [label_to_index[label] for label in pd.Series(flow1['sender']).unique()]
    receiver_idx  = [label_to_index[label] for label in pd.Series(flow1['receiver']).unique()]
    currency_idx  = [label_to_index[label] for label in pd.Series(flow2['currency']).unique()]
    mxmt_idx      = [label_to_index[label] for label in pd.Series(flow3['mx/mt']).unique()]
    mt_idx        = [label_to_index[label] for label in pd.Series(flow4['Mt']).unique()]
    direction_idx = [label_to_index[label] for label in pd.Series(flow5['direction']).unique()]
    status_idx    = [label_to_index[label] for label in pd.Series(flow6['transaction_status']).unique()]
    payment_idx   = [label_to_index[label] for label in pd.Series(flow7['payment_method']).unique()]
    amount_idx    = [label_to_index[str(label)] for label in pd.Series(flow8['amount_category']).unique()]

    
    # Assign different colors to each node type
    for idx in sender_idx:
        node_colors[idx] = "rgba(214, 39, 40, 0.8)"  # Red for senders
    for idx in receiver_idx:
        node_colors[idx] = "rgba(44, 160, 44, 0.8)"  # Green for receivers
    for idx in currency_idx:
        node_colors[idx] = "rgba(255, 127, 14, 0.8)"  # Orange for currency
    for idx in mxmt_idx:
        node_colors[idx] = "rgba(148, 103, 189, 0.8)"  # Purple for mx/mt
    for idx in mt_idx:
        node_colors[idx] = "rgba(140, 86, 75, 0.8)"   # Brown for Mt
    for idx in direction_idx:
        node_colors[idx] = "rgba(23, 190, 207, 0.8)"  # Cyan for directions
    for idx in status_idx:
        node_colors[idx] = "rgba(188, 189, 34, 0.8)"  # Yellow-green for transaction status
    for idx in payment_idx:
        node_colors[idx] = "rgba(127, 127, 127, 0.8)"  # Gray for payment method
    for idx in amount_idx:
        node_colors[idx] = "rgba(31, 119, 180, 0.8)"  # Blue for amount categories
    
    # Create base and hover colors for links based on their source
    link_colors = []
    link_hover_colors = []
    
    for s in source:
        # Normal state: semi-transparent
        base_color = node_colors[s].replace("0.8", "0.4")  # More transparent for normal state
        # Hover state: more opaque and brighter
        hover_color = node_colors[s].replace("0.8", "0.9")  # More opaque for hover state
        
        link_colors.append(base_color)
        link_hover_colors.append(hover_color)
    
    # Create the Sankey diagram with enhanced styling
    fig = go.Figure(data=[go.Sankey(
        arrangement="snap",
        node=dict(
            pad=20,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=list(labels),
            color=node_colors,
            # Add hover effect to nodes
            hoverinfo="all",
            hoverlabel=dict(
                bgcolor="white",
                font_size=14,
                font_family="Arial"
            )
        ),
        link=dict(
            source=source,
            target=target,
            value=value,
            color=link_colors,
            # Add animated hover effect for links
            customdata=np.array(link_labels),
            hovertemplate='%{customdata}<extra></extra>',
            hoverlabel=dict(
                bgcolor="white",
                font_size=14,
                font_family="Arial"
            )
        )
    )])
    
    # Add flow animation using frames
    # We'll create frames to animate link opacity to simulate flow
    frames = []
    
    # Create several frames with different link opacities to simulate flow
    for i in range(10):
        # Generate a flow pattern that moves from left to right
        phase = i / 10.0
        flow_colors = []
        
        for s, t in zip(source, target):
            # Calculate distance from left to right (normalized)
            pos = (s + t) / (2 * num_nodes)
            
            # Create a wave pattern that moves across the links
            intensity = 0.5 + 0.4 * np.sin(2 * np.pi * (pos - phase))
            
            # Apply the intensity to the base color
            base_color = node_colors[s].replace("0.8", str(intensity))
            flow_colors.append(base_color)
        
        # Create a frame with the current flow pattern
        frames.append(go.Frame(
            data=[go.Sankey(
                link=dict(color=flow_colors)
            )],
            name=f"frame{i}"
        ))
    
    # Add frames to the figure
    fig.frames = frames
    
    # Add play button for animation
    fig.update_layout(
        updatemenus=[
            dict(
                type="buttons",
                showactive=False,
                buttons=[
                    dict(
                        label="Play Flow",
                        method="animate",
                        args=[
                            None,
                            dict(
                                frame=dict(duration=200, redraw=True),
                                fromcurrent=True,
                                mode="immediate",
                                transition=dict(duration=200)
                            )
                        ]
                    )
                ],
                x=0.1,
                y=1,
            )
        ]
    )
    
    # Add annotations to describe each column
    fig.update_layout(
        title_text=title,
        font_size=12,
        height=800,  # Increased height to accommodate more nodes
        width=1200,  # Increased width to accommodate more columns
        # Add hover mode settings to improve interactivity
        hovermode="closest",
        hoverdistance=10,
        # Add transition settings for smoother animations
        transition=dict(
            duration=1000,
            easing="cubic-in-out"
        ),
        annotations=[
            dict(x=0.02, y=1, xref='paper', yref='paper', text='Sender', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.14, y=1, xref='paper', yref='paper', text='Receiver', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.25, y=1, xref='paper', yref='paper', text='Currency', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.37, y=1, xref='paper', yref='paper', text='MX/MT', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.48, y=1, xref='paper', yref='paper', text='MT', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.60, y=1, xref='paper', yref='paper', text='Direction', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.71, y=1, xref='paper', yref='paper', text='Status', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.83, y=1, xref='paper', yref='paper', text='Payment', showarrow=False, font=dict(size=14, color='black')),
            dict(x=0.94, y=1, xref='paper', yref='paper', text='Amount', showarrow=False, font=dict(size=14, color='black')),
        ]
    )
    
    # Add custom CSS for hover effects
    fig.update_layout(
        hoverlabel_bgcolor="white",
        hoverlabel_font_size=14,
        hoverlabel_font_family="Arial"
    )
    
    # Add configuration options for better interactivity
    fig.update_layout(
        dragmode="pan",
        clickmode="event+select"
    )
    
    return fig

def create_flow_map(df, animate=False):
    
    import pandas as pd
    import numpy as np
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots
    
    # Aggregate transactions by sender and receiver regions
    region_flows = df.groupby(['send region', 'receiver region'])['amount'].sum().reset_index()
    
    # Filter out flows where sender and receiver are the same
    region_flows = region_flows[region_flows['send region'] != region_flows['receiver region']]
    
    # Create a figure with a map
    fig = go.Figure()
    
    # Add a basemap
    fig.add_trace(go.Scattergeo(
        lon=[],
        lat=[],
        mode='markers',
        marker=dict(
            size=1,
            color='rgba(0,0,0,0)'
        ),
        showlegend=False,
        hoverinfo='none'
    ))
    
    # Set up color scale for flows based on amount
    max_amount = region_flows['amount'].max()
    min_amount = region_flows['amount'].min()
    
    # Add region markers
    all_regions = set(region_flows['send region'].unique()).union(set(region_flows['receiver region'].unique()))
    
    # Add only regions that have coordinates
    valid_regions = [region for region in all_regions if region in region_coordinates]
    
    # Create a list of region names, longitudes, and latitudes
    region_names = []
    region_lons = []
    region_lats = []
    
    for region in valid_regions:
        if region in region_coordinates:
            region_names.append(region)
            region_lons.append(region_coordinates[region][1])  # lon
            region_lats.append(region_coordinates[region][0])  # lat
    
    # Add region markers
    fig.add_trace(go.Scattergeo(
        lon=region_lons,
        lat=region_lats,
        text=region_names,
        mode='markers+text',
        marker=dict(
            size=10,
            color='blue',
            line=dict(width=1, color='black')
        ),
        textposition="top center",
        name='Regions',
        hoverinfo='text'
    ))
    
    # Calculate normalized amounts for line widths and colors
    region_flows['normalized_amount'] = (region_flows['amount'] - min_amount) / (max_amount - min_amount)
    region_flows['width'] = 1 + 5 * region_flows['normalized_amount']  # Line width between 1 and 6
    
    # Add flow lines 
    # We'll create separate traces for each flow to enable better control over animations
    for idx, flow in region_flows.iterrows():
        sender = flow['send region']
        receiver = flow['receiver region']
        
        # Skip if we don't have coordinates for either region
        if sender not in region_coordinates or receiver not in region_coordinates:
            continue
        
        # Get coordinates
        sender_lat, sender_lon = region_coordinates[sender]
        receiver_lat, receiver_lon = region_coordinates[receiver]
        
        # Calculate control point for curved lines
        # We'll make a curved line by adding a midpoint that's offset
        mid_lon = (sender_lon + receiver_lon) / 2
        mid_lat = (sender_lat + receiver_lat) / 2
        
        # Add some curvature based on distance
        dist = np.sqrt((receiver_lon - sender_lon)**2 + (receiver_lat - sender_lat)**2)
        curve_strength = 0.1 * dist  # Adjust this factor for more/less curvature
        
        # Calculate perpendicular offset
        dx = receiver_lon - sender_lon
        dy = receiver_lat - sender_lat
        
        # Get perpendicular direction (rotate 90 degrees)
        perp_dx = -dy
        perp_dy = dx
        
        # Normalize and apply curve strength
        magnitude = np.sqrt(perp_dx**2 + perp_dy**2)
        if magnitude > 0:  # Avoid division by zero
            perp_dx = perp_dx / magnitude * curve_strength
            perp_dy = perp_dy / magnitude * curve_strength
        
        # Apply offset to midpoint
        mid_lon += perp_dx
        mid_lat += perp_dy
        
        # Create a smooth curve using multiple points
        lon_points = []
        lat_points = []
        num_points = 20
        
        for i in range(num_points):
            t = i / (num_points - 1)
            # Quadratic Bezier curve
            lon = (1-t)**2 * sender_lon + 2*(1-t)*t * mid_lon + t**2 * receiver_lon
            lat = (1-t)**2 * sender_lat + 2*(1-t)*t * mid_lat + t**2 * receiver_lat
            lon_points.append(lon)
            lat_points.append(lat)
        
        # Calculate color based on normalized amount (blue to red)
        color = f'rgba({int(255 * flow["normalized_amount"])}, 0, {int(255 * (1 - flow["normalized_amount"]))}, 0.8)'
        
        # Add the flow line
        fig.add_trace(go.Scattergeo(
            lon=lon_points,
            lat=lat_points,
            mode='lines',
            line=dict(
                width=flow['width'],
                color=color
            ),
            opacity=0.7,
            name=f"{sender} → {receiver}",
            text=f"{sender} → {receiver}: ${flow['amount']:,.2f}",
            hoverinfo='text',
            customdata=[{
                'sender': sender,
                'receiver': receiver,
                'amount': flow['amount'],
                'normalized_amount': flow['normalized_amount']
            }]
        ))
    
    # Configure the base map
    fig.update_geos(
        showcoastlines=True, coastlinecolor="Black",
        showland=True, landcolor="LightGreen",
        showocean=True, oceancolor="LightBlue",
        showlakes=True, lakecolor="Blue",
        showrivers=True, rivercolor="Blue",
        showcountries=True, countrycolor="Black"
    )
    
    # Add animation frames if requested
    if animate:
        frames = []
        
        for frame_idx in range(20):
            frame_data = []
            
            # First add the basemap and markers (unchanged)
            frame_data.append(fig.data[0])  # Basemap
            frame_data.append(fig.data[1])  # Region markers
            
            # For each flow line, create an animated version
            for flow_idx in range(2, len(fig.data)):
                flow_trace = fig.data[flow_idx]
                
                # Create a progress value that moves along the path
                progress = frame_idx / 20.0
                
                # For animation, we'll show only part of the path up to the current progress
                visible_points = int(progress * len(flow_trace.lon)) + 1
                visible_points = max(2, min(visible_points, len(flow_trace.lon)))
                
                
                animated_trace = go.Scattergeo(
                    lon=flow_trace.lon[:visible_points],
                    lat=flow_trace.lat[:visible_points],
                    mode='lines',
                    line=dict(
                        width=flow_trace.line.width,
                        color=flow_trace.line.color
                    ),
                    opacity=flow_trace.opacity,
                    name=flow_trace.name,
                    text=flow_trace.text,
                    hoverinfo=flow_trace.hoverinfo
                )
                
                frame_data.append(animated_trace)
            
            # Create the frame
            frames.append(go.Frame(
                data=frame_data,
                name=f"frame{frame_idx}"
            ))
        
        
        fig.frames = frames
        
       
        fig.update_layout(
            updatemenus=[
                dict(
                    type="buttons",
                    showactive=False,
                    buttons=[
                        dict(
                            label="Play Flow Animation",
                            method="animate",
                            args=[
                                None,
                                dict(
                                    frame=dict(duration=100, redraw=True),
                                    fromcurrent=True,
                                    mode="immediate",
                                    transition=dict(duration=50)
                                )
                            ]
                        )
                    ],
                    x=0.1,
                    y=0.95,
                )
            ]
        )
    
    fig.update_layout(
        title="Global Banking Transaction Flows",
        height=600,
        showlegend=False,
        geo=dict(
            projection_type="natural earth",
            showframe=False,
            showcountries=True
        )
    )
    
    return fig


def visualize_banking_flows(df):
    import ipywidgets as widgets
    from ipywidgets import Layout
    
    
    required_columns = ['send region', 'receiver region', 'amount', 'transaction_status', 'payment_method']
    missing_columns = [col for col in required_columns if col not in df.columns]
    
    if missing_columns:
        raise ValueError(f"Missing required columns in dataframe: {missing_columns}")
    
    if 'year' not in df.columns:
        print("Warning: 'year' column not found in data. Creating a dummy year column with value 2023.")
        df['year'] = 2023
    
    # Ensure all regions are in the coordinates dictionary
    unknown_send_regions = set(df['send region'].unique()) - set(region_coordinates.keys())
    unknown_recv_regions = set(df['receiver region'].unique()) - set(region_coordinates.keys())
    
    if unknown_send_regions:
        print(f"Warning: Unknown send regions: {unknown_send_regions}")
    if unknown_recv_regions:
        print(f"Warning: Unknown receiver regions: {unknown_recv_regions}")
    
    # Get unique regions for dropdowns
    all_regions = sorted(list(set(df['send region'].unique()).union(set(df['receiver region'].unique()))))
    
    # Get unique years for the year dropdown
    years = sorted(df['year'].unique())
    
    # Create dropdown widgets with improved styling
    send_dropdown = widgets.Dropdown(
        options=['All'] + all_regions,
        value='All',
        description='Sender Region:',
        layout=Layout(width='300px'),
        style={'description_width': 'initial'}
    )
    
    receive_dropdown = widgets.Dropdown(
        options=['All'] + all_regions,
        value='All',
        description='Receiver Region:',
        layout=Layout(width='300px'),
        style={'description_width': 'initial'}
    )
    
    # Create year dropdown
    year_dropdown = widgets.Dropdown(
        options=['All'] + list(years),
        value='All',
        description='Year:',
        layout=Layout(width='300px'),
        style={'description_width': 'initial'}
    )
    
    # Add animation toggle button
    animation_button = widgets.ToggleButton(
        value=True,  # Default to animation enabled
        description='Flow Animation',
        disabled=False,
        button_style='info',
        tooltip='Toggle the flow animation on/off',
        icon='play'
    )
    
    # Create output areas for both visualizations
    map_output = widgets.Output()
    sankey_output = widgets.Output()
    heatmap_output = widgets.Output()  
    time_series_output=widgets.Output()

    
    # Create status message for user feedback
    status_message = widgets.HTML(
        value="<div style='padding: 10px; color: #555;'>Select options and click Update to visualize banking flows.</div>"
    )
    
    # Create an update button for better user experience
    update_button = widgets.Button(
        description='Update Visualization',
        button_style='primary',
        tooltip='Click to update the visualization',
        icon='refresh'
    )
    
    def update_visualizations(sender_region, receiver_region, year, animate):
        # Filter the dataframe based on selections
        filtered_df = df.copy()
        
        send_region = None if sender_region == 'All' else sender_region
        receive_region = None if receiver_region == 'All' else receiver_region
        selected_year = None if year == 'All' else year
        
        if send_region:
            filtered_df = filtered_df[filtered_df['send region'] == send_region]
        
        if receive_region:
            filtered_df = filtered_df[filtered_df['receiver region'] == receive_region]
        
        if selected_year and 'year' in filtered_df.columns:
            filtered_df = filtered_df[filtered_df['year'] == selected_year]
        
        # Update the map visualization
        with map_output:
            map_output.clear_output(wait=True)
            status_message.value = "<div style='padding: 10px; color: #555;'>Generating map visualization...</div>"
            flow_map = create_flow_map(filtered_df, animate=animate)
            flow_map.show()
        
        # Update the sankey diagram
        with sankey_output:
            sankey_output.clear_output(wait=True)
            status_message.value = "<div style='padding: 10px; color: #555;'>Generating Sankey diagram...</div>"
            sankey_fig = create_sankey_for_regions(filtered_df, send_region, receive_region, selected_year)
            
            # Start animation if toggle is on
            if animate:
                sankey_fig.update_layout(
                    updatemenus=[dict(
                        type="buttons",
                        showactive=False,
                        buttons=[dict(
                            label="Play Flow",
                            method="animate",
                            args=[None, dict(frame=dict(duration=200, redraw=True), fromcurrent=True, mode="immediate")]
                        )],
                        active=0
                    )]
                )
            
            sankey_fig.show()
        with heatmap_output:
            heatmap_output.clear_output(wait=True)
            status_message.value = "<div style='padding: 10px; color: #555;'>Generating heatmap visualization...</div>"
            # Create heatmap for transaction counts
            matrix = filtered_df.pivot_table(
            index='send region',
            columns='receiver region',
            values='amount',  # or another column
            aggfunc='count',  # or 'sum'
            fill_value=0
        )
            
            # Generate heatmap
            heatmap_fig = px.imshow(
                matrix,
                labels=dict(x="Receiver Region", y="Sender Region", color="Transaction Count"),
                x=matrix.columns,
                y=matrix.index,
                color_continuous_scale='Viridis',
                text_auto=True
            )
            
            heatmap_fig.update_layout(
                title="Heatmap of Transaction Volumes Between Regions",
                xaxis_title="Receiver Region",
                yaxis_title="Sender Region",
                autosize=True
            )
            
            heatmap_fig.show()
        # Update the time series visualization
        with time_series_output:
            time_series_output.clear_output(wait=True)
            status_message.value = "<div style='padding: 10px; color: #555;'>Generating time series visualization...</div>"

            try:
                plot_transaction_time_series(
                    filtered_df,
                    value_column='amount',       # or 'Count' if precomputed
                    time_column='year',          # adjust if using datetime column
                    group_by='send region',      # or 'receiver region', etc.
                    aggfunc='sum',               # or 'count'
                    title='Transaction Trends Over Time'
                )
            except Exception as e:
                print(f"Error generating time series: {e}")

        
        
        
        # Update status message
        filters = []
        if send_region:
            filters.append(f"sender: {send_region}")
        if receive_region:
            filters.append(f"receiver: {receive_region}")
        if selected_year:
            filters.append(f"year: {selected_year}")
            
        if filters:
            filter_text = ", ".join(filters)
            status_message.value = f"<div style='padding: 10px; color: #555;'>Showing transactions with {filter_text}</div>"
        else:
            status_message.value = "<div style='padding: 10px; color: #555;'>Showing all transactions</div>"
    
    def on_update_button_click(b):
        update_visualizations(send_dropdown.value, receive_dropdown.value, year_dropdown.value, animation_button.value)
    
    update_button.on_click(on_update_button_click)
    
    # Layout the widgets in a more organized way
    filters_box = widgets.HBox([send_dropdown, receive_dropdown, year_dropdown])
    controls_box = widgets.HBox([animation_button, update_button])
    
    # Create tabs for map and sankey
    tab = widgets.Tab([map_output, sankey_output,heatmap_output,time_series_output])
    tab.set_title(0, 'Flow Map')
    tab.set_title(1, 'Sankey Diagram')
    tab.set_title(2, 'Heatmap')  
    tab.set_title(3, 'Time Series')  



    styled_dashboard = widgets.HTML("""
<div style="
    background-color: #1e1e1e;
    color: white;
    padding: 30px;
    margin: 10px;
    border: 1px solid #444;
    border-radius: 10px;
">
    <h2 style='color: #10a37f;'>Banking Flows Analysis Dashboard</h2>
    <p>Use the controls below to explore transaction flows between regions.</p>
</div>
""")

    # Create a styled container for the interface
    container = widgets.VBox([
        styled_dashboard,

        filters_box,
        controls_box,
        status_message,
        tab
    ], layout=Layout(border='1px solid #ddd', padding='30px', margin='10px'))
    


    dashboard_container = widgets.VBox([
    widgets.HTML("<style>body { background-color: black; }</style>"),
    container  # your main dashboard container
])
    display(dashboard_container)

   
    
    # Initial visualization
    update_visualizations('All', 'All', 'All', animation_button.value)
    
    # Return the final filtered dataframe for reference
    return df

In [84]:
visualize_banking_flows(df) 



VBox(children=(HTML(value='<style>body { background-color: black; }</style>'), VBox(children=(HTML(value='\n<d…

Unnamed: 0,Count,sender,receiver,amount,currency,Mt,year,direction,category,send region,receiver region,mx/mt,transaction_status,payment_method
0,1,Mashreq Bank,United Arab Bank,38111.53,EUR,MT940,2015,Outgoing,Retail Payment,Central Asia,Middle East,MT,Completed,ACH
1,2,Barclays,RAK Bank,128668.30,GBP,MT799,2022,Incoming,FX Settlement,Latin America,Middle East,MT,Pending,Wire Transfer
2,3,Royal Bank of Canada,Emirates NBD,26914.61,EUR,MT103,2023,Outgoing,Retail Payment,Southeast Asia,Middle East,MT,Completed,Wire Transfer
3,4,Bank of China,First Abu Dhabi Bank,1248.98,USD,MT760,2017,Incoming,Trade Finance,Asia Pacific,Middle East,MT,Failed,ACH
4,5,BNP Paribas,Bank of Tokyo-Mitsubishi UFJ,6710.04,GBP,MT940,2016,Incoming,Loan Disbursement,Central Asia,Southeast Asia,MT,Failed,Check
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,496,UBS,National Bank of Fujairah,14928.90,AED,MT799,2015,Incoming,Corporate Payment,South Asia,Middle East,MT,Completed,Credit Card
496,497,Bank of China,Abu Dhabi Commercial Bank,11987567.00,JPY,MT940,2018,Outgoing,Loan Disbursement,Caribbean,Middle East,MT,Completed,ACH
497,498,Industrial and Commercial Bank of China,Credit Suisse,61238.79,USD,MT700,2021,Outgoing,Corporate Payment,South Asia,South Asia,MT,Cancelled,ACH
498,499,Abu Dhabi Commercial Bank,Commercial Bank of Dubai,1165309.00,JPY,MT202,2018,Incoming,Loan Disbursement,Caribbean,Middle East,MT,Completed,SWIFT


In [41]:
import plotly.express as px
import pandas as pd

# Create pivot table: counts of transactions by send and receiver regions
region_matrix = df.pivot_table(index='send region', 
                               columns='receiver region', 
                               values='Count',  # Use transaction count
                               aggfunc='count', 
                               fill_value=0)

# Create an interactive heatmap
fig = px.imshow(
    region_matrix,
    labels=dict(x="Receiver Region", y="Sender Region", color="Transaction Count"),
    x=region_matrix.columns,
    y=region_matrix.index,
    color_continuous_scale='Viridis',
    text_auto=True
)

fig.update_layout(
    title="Heatmap of Transaction Volumes Between Regions",
    xaxis_title="Receiver Region",
    yaxis_title="Sender Region",
    autosize=True
)

fig.show()
