In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

np.random.seed(42)

num_records = 500

banks = [
    "Citibank", "HSBC", "JPMorgan Chase", "Bank of America", "Standard Chartered",
    "Deutsche Bank", "BNP Paribas", "Barclays", "Credit Suisse", "UBS",
    "Emirates NBD", "Abu Dhabi Commercial Bank", "First Abu Dhabi Bank", "Dubai Islamic Bank",
    "Mashreq Bank", "Bank of China", "Industrial and Commercial Bank of China", "Wells Fargo",
    "Goldman Sachs", "Morgan Stanley", "Royal Bank of Canada", "Bank of Tokyo-Mitsubishi UFJ"
]

uae_banks = [
    "Emirates NBD", "Abu Dhabi Commercial Bank", "First Abu Dhabi Bank", 
    "Dubai Islamic Bank", "Mashreq Bank", "Commercial Bank of Dubai", 
    "Abu Dhabi Islamic Bank", "RAK Bank", "National Bank of Fujairah",
    "Sharjah Islamic Bank", "United Arab Bank", "National Bank of Umm Al Qaiwain"
]

currencies = ["USD", "EUR", "GBP", "AED", "JPY", "CHF", "CNY", "CAD", "AUD", "SGD"]
mt_types = ["MT103", "MT202", "MT700", "MT760", "MT799", "MT910", "MT940", "MT950"]
directions = ["Outgoing", "Incoming"]
categories = ["Trade Finance", "Corporate Payment", "Retail Payment", "Interbank Settlement", 
              "Treasury Operations", "Investment", "Loan Disbursement", "FX Settlement"]

regions = [
    "North America", "Europe", "Asia Pacific", "Middle East", "Africa", 
    "Latin America", "Caribbean", "Central Asia", "South Asia", "Southeast Asia"
]

def random_date(start_year=2015, end_year=2023):
    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)
    time_between_dates = end_date - start_date
    days_between_dates = time_between_dates.days
    random_number_of_days = random.randrange(days_between_dates)
    return start_date + timedelta(days=random_number_of_days)

data = []
for i in range(num_records):
    # Determine if receiver is UAE-based (75% probability)
    is_uae_receiver = np.random.choice([True, False], p=[0.75, 0.25])

    sender = np.random.choice(banks)
    if is_uae_receiver:
        receiver = np.random.choice(uae_banks)
        receiver_region = "Middle East"  # UAE is in Middle East
    else:

        possible_receivers = [bank for bank in banks if bank != sender]
        receiver = np.random.choice(possible_receivers)
        receiver_region = np.random.choice([r for r in regions if r != "Middle East"])

    count = i + 1
    amount = round(np.random.lognormal(mean=10, sigma=1.5), 2)  
    currency = np.random.choice(currencies, p=[0.4, 0.2, 0.1, 0.15, 0.05, 0.02, 0.03, 0.02, 0.02, 0.01])  # USD most common
    mt = np.random.choice(mt_types)
    year = random_date().year
    direction = np.random.choice(directions)
    category = np.random.choice(categories)
    send_region = np.random.choice(regions)
    mx_mt = "MT"  # All are MT in this simulation

    # Add to data
    data.append([
        count, sender, receiver, amount, currency, mt, year, direction, 
        category, send_region, receiver_region, mx_mt
    ])

columns = ['Count', 'sender', 'receiver', 'amount', 'currency', 'Mt', 'year', 
           'direction', 'category', 'send region', 'receiver region', 'mx/mt']
df = pd.DataFrame(data, columns=columns)

for idx, row in df.iterrows():
    if row['currency'] == 'JPY':
        df.at[idx, 'amount'] = round(row['amount'] * 100)  
    elif row['currency'] == 'AED':
        df.at[idx, 'amount'] = round(row['amount'] * 3.67, 2)  
    elif row['currency'] == 'EUR':
        df.at[idx, 'amount'] = round(row['amount'] * 0.85, 2)
    elif row['currency'] == 'GBP':
        df.at[idx, 'amount'] = round(row['amount'] * 0.75, 2)

df

Unnamed: 0,Count,sender,receiver,amount,currency,Mt,year,direction,category,send region,receiver region,mx/mt
0,1,Mashreq Bank,United Arab Bank,38111.53,EUR,MT940,2021,Outgoing,Retail Payment,Central Asia,Middle East,MT
1,2,Barclays,RAK Bank,128668.30,GBP,MT799,2015,Incoming,FX Settlement,Latin America,Middle East,MT
2,3,Royal Bank of Canada,Emirates NBD,26914.61,EUR,MT103,2018,Outgoing,Retail Payment,Southeast Asia,Middle East,MT
3,4,Bank of China,First Abu Dhabi Bank,1248.98,USD,MT760,2015,Incoming,Trade Finance,Asia Pacific,Middle East,MT
4,5,BNP Paribas,Bank of Tokyo-Mitsubishi UFJ,6710.04,GBP,MT940,2017,Incoming,Loan Disbursement,Central Asia,Southeast Asia,MT
...,...,...,...,...,...,...,...,...,...,...,...,...
495,496,UBS,National Bank of Fujairah,14928.90,AED,MT799,2018,Incoming,Corporate Payment,South Asia,Middle East,MT
496,497,Bank of China,Abu Dhabi Commercial Bank,11987567.00,JPY,MT940,2022,Outgoing,Loan Disbursement,Caribbean,Middle East,MT
497,498,Industrial and Commercial Bank of China,Credit Suisse,61238.79,USD,MT700,2016,Outgoing,Corporate Payment,South Asia,South Asia,MT
498,499,Abu Dhabi Commercial Bank,Commercial Bank of Dubai,1165309.00,JPY,MT202,2015,Incoming,Loan Disbursement,Caribbean,Middle East,MT


In [3]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Define region coordinates (approximate centroids)
region_coordinates = {
    "North America": {"lat": 40.0, "lon": -100.0},
    "Europe": {"lat": 50.0, "lon": 10.0},
    "Asia Pacific": {"lat": 30.0, "lon": 120.0},
    "Middle East": {"lat": 27.0, "lon": 45.0},
    "Africa": {"lat": 5.0, "lon": 20.0},
    "Latin America": {"lat": -10.0, "lon": -60.0},
    "Caribbean": {"lat": 20.0, "lon": -75.0},
    "Central Asia": {"lat": 45.0, "lon": 65.0},
    "South Asia": {"lat": 20.0, "lon": 75.0},
    "Southeast Asia": {"lat": 10.0, "lon": 105.0}
}

# Function to prepare flow data between regions
def prepare_region_flow_data(df):
    # Group by sender and receiver regions
    grouped = df.groupby(['send region', 'receiver region'])['amount'].sum().reset_index()
    
    # Create a copy to avoid modifying the original
    flow_data = grouped.copy()
    
    # Add coordinates for source and target regions
    flow_data['source_lat'] = flow_data['send region'].apply(lambda x: region_coordinates[x]['lat'])
    flow_data['source_lon'] = flow_data['send region'].apply(lambda x: region_coordinates[x]['lon'])
    flow_data['target_lat'] = flow_data['receiver region'].apply(lambda x: region_coordinates[x]['lat'])
    flow_data['target_lon'] = flow_data['receiver region'].apply(lambda x: region_coordinates[x]['lon'])
    
    return flow_data

# Function to create choropleth map with flows
def create_region_flow_map(df):
    # Prepare flow data
    flow_data = prepare_region_flow_data(df)
    
    # Calculate total inflows and outflows for each region
    inflows = df.groupby('receiver region')['amount'].sum().reset_index()
    inflows.columns = ['region', 'inflow']
    
    outflows = df.groupby('send region')['amount'].sum().reset_index()
    outflows.columns = ['region', 'outflow']
    
    # Merge inflows and outflows
    region_flows = pd.merge(inflows, outflows, on='region', how='outer').fillna(0)
    
    # Calculate net flow (inflow - outflow)
    region_flows['net_flow'] = region_flows['inflow'] - region_flows['outflow']
    region_flows['total_volume'] = region_flows['inflow'] + region_flows['outflow']
    
    # Create coordinates dataframe for the choropleth
    region_coords = pd.DataFrame(region_coordinates).T.reset_index()
    region_coords.columns = ['region', 'lat', 'lon']
    
    # Merge coordinates with flow data
    region_data = pd.merge(region_flows, region_coords, on='region', how='left')
    
    # Normalize flow values for better visualization
    max_amount = flow_data['amount'].max()
    flow_data['normalized_amount'] = flow_data['amount'] / max_amount * 10
    
    # Create figure with map
    fig = go.Figure()
    
    # Add base map (with natural earth projection)
    fig.add_trace(go.Choropleth(
        locationmode='country names',
        z=[0],  # Placeholder, we'll use a scatter plot for the regions
        showscale=False,
        colorscale='Blues',
        marker_line_color='darkgray',
        marker_line_width=0.5,
    ))
    
    # Add scatter points for regions (sized by total transaction volume)
    fig.add_trace(go.Scattergeo(
        lon=region_data['lon'],
        lat=region_data['lat'],
        text=region_data['region'] + '<br>Inflow: $' + region_data['inflow'].round(2).astype(str) + 
             '<br>Outflow: $' + region_data['outflow'].round(2).astype(str),
        mode='markers',
        marker=dict(
            size=region_data['total_volume'] / region_data['total_volume'].max() * 30 + 10,
            color=region_data['net_flow'],
            colorscale='RdBu',
            colorbar=dict(
                title='Net Flow<br>(Inflow - Outflow)',
                thickness=15
            ),
            cmid=0,  # Center colorscale at 0
            line=dict(width=1, color='black')
        ),
        name='Regions'
    ))
    
    # Add flow lines between regions
    for _, row in flow_data.iterrows():
        # Skip flows below threshold to avoid cluttering
        if row['normalized_amount'] < 0.5:
            continue
            
        # Create curved path for the flow lines
        fig.add_trace(go.Scattergeo(
            lon=[row['source_lon'], None, row['target_lon']],
            lat=[row['source_lat'], None, row['target_lat']],
            mode='lines',
            line=dict(
                width=row['normalized_amount'],
                color='rgba(80, 80, 220, 0.5)'
            ),
            opacity=0.7,
            hoverinfo='none',
            showlegend=False
        ))
    
    # Update layout
    fig.update_layout(
        title='Global Banking Transaction Flows Between Regions',
        geo=dict(
            projection_type='natural earth',
            showland=True,
            landcolor='rgb(243, 243, 243)',
            countrycolor='rgb(204, 204, 204)',
            showcountries=True,
            oceancolor='rgba(220, 230, 255, 1)',
            showocean=True,
            lakecolor='rgb(255, 255, 255)',
            showlakes=True,
            showcoastlines=True,
            coastlinecolor='rgb(80, 80, 80)',
            coastlinewidth=0.5
        ),
        width=1200,
        height=700,
        margin=dict(l=0, r=0, t=50, b=0)
    )
    
    return fig

# Alternative version using mapbox for more detailed map tiles
def create_mapbox_flow_map(df, mapbox_token=None):
    # Prepare flow data
    flow_data = prepare_region_flow_data(df)
    
    # Calculate total inflows and outflows for each region
    inflows = df.groupby('receiver region')['amount'].sum().reset_index()
    inflows.columns = ['region', 'inflow']
    
    outflows = df.groupby('send region')['amount'].sum().reset_index()
    outflows.columns = ['region', 'outflow']
    
    # Merge inflows and outflows
    region_flows = pd.merge(inflows, outflows, on='region', how='outer').fillna(0)
    
    # Calculate net flow (inflow - outflow)
    region_flows['net_flow'] = region_flows['inflow'] - region_flows['outflow']
    region_flows['total_volume'] = region_flows['inflow'] + region_flows['outflow']
    
    # Create coordinates dataframe for the regions
    region_coords = pd.DataFrame(region_coordinates).T.reset_index()
    region_coords.columns = ['region', 'lat', 'lon']
    
    # Merge coordinates with flow data
    region_data = pd.merge(region_flows, region_coords, on='region', how='left')
    
    # Normalize flow values for better visualization
    max_amount = flow_data['amount'].max()
    flow_data['normalized_amount'] = flow_data['amount'] / max_amount * 10
    
    # Create figure with mapbox background
    fig = go.Figure()
    
    # Add flows as custom shapes (arcs between regions)
    for _, row in flow_data.iterrows():
        # Skip flows below threshold to reduce clutter
        if row['normalized_amount'] < 0.5:
            continue
        
        # Calculate midpoint for the curve (with slight offset for visualization)
        lon_diff = row['target_lon'] - row['source_lon']
        lat_diff = row['target_lat'] - row['source_lat']
        
        # Adjust midpoint upward based on distance
        distance = np.sqrt(lon_diff**2 + lat_diff**2)
        midpoint_offset = min(distance * 0.15, 10)  # Cap the offset
        
        # Generate curve points
        curve_points = []
        steps = 20
        for i in range(steps + 1):
            t = i / steps
            # Parametric curve formula
            x = (1-t)**2 * row['source_lon'] + 2*(1-t)*t * ((row['source_lon'] + row['target_lon'])/2) + t**2 * row['target_lon']
            y = (1-t)**2 * row['source_lat'] + 2*(1-t)*t * ((row['source_lat'] + row['target_lat'])/2 + midpoint_offset) + t**2 * row['target_lat']
            curve_points.append((x, y))
        
        # Extract lons and lats from points
        lons, lats = zip(*curve_points)
        
        # Create curve as a line
        fig.add_trace(go.Scattermapbox(
            lon=lons,
            lat=lats,
            mode='lines',
            line=dict(
                width=row['normalized_amount'] * 1.5,
                color=f'rgba(70, 130, 180, {min(0.8, 0.3 + row["normalized_amount"] * 0.05)})'
            ),
            hoverinfo='text',
            hovertext=f"{row['send region']} → {row['receiver region']}<br>Amount: ${row['amount']:,.2f}",
            showlegend=False
        ))
    
    # Add regions as points
    fig.add_trace(go.Scattermapbox(
        lon=region_data['lon'],
        lat=region_data['lat'],
        text=region_data['region'],
        customdata=np.dstack((
            region_data['inflow'].round(2),
            region_data['outflow'].round(2),
            region_data['net_flow'].round(2)
        ))[0],
        hovertemplate='<b>%{text}</b><br>Inflow: $%{customdata[0]:,.2f}<br>Outflow: $%{customdata[1]:,.2f}<br>Net Flow: $%{customdata[2]:,.2f}',
        mode='markers',
        marker=dict(
            size=region_data['total_volume'] / region_data['total_volume'].max() * 25 + 10,
            color=region_data['net_flow'],
            colorscale='RdBu',
            colorbar=dict(
                title='Net Flow<br>(Inflow - Outflow)',
                thickness=15
            ),
            cmid=0,  # Center colorscale at 0
            opacity=0.8
        ),
        name='Regions'
    ))
    
    # Set mapbox style
    mapbox = dict(
        style='carto-positron',  # Use Carto base map (no token needed)
        zoom=1.2,
        center=dict(lat=20, lon=0)
    )
    
    # If mapbox token is provided, use mapbox style
    if mapbox_token:
        mapbox['accesstoken'] = mapbox_token
        mapbox['style'] = 'mapbox://styles/mapbox/light-v10'
    
    # Update layout
    fig.update_layout(
        title='Global Banking Transaction Flows Between Regions',
        mapbox=mapbox,
        width=1200,
        height=700,
        margin=dict(l=0, r=0, t=50, b=0)
    )
    
    return fig

# Example usage
def visualize_banking_flows_on_map(df):
    # Create version with natural earth projection
    import plotly.io as pio
    pio.renderers.default = 'browser'
    geo_fig = create_region_flow_map(df)
    geo_fig.write_html("banking_flows_map.html")
    
    # Create version with mapbox tiles (no token required for carto base maps)
    mapbox_fig = create_mapbox_flow_map(df)
    mapbox_fig.write_html("banking_flows_mapbox.html")
    
    print("Map visualizations created and saved as HTML files:")
    print("- banking_flows_map.html")
    print("- banking_flows_mapbox.html")
    
    # Return the mapbox figure as default
    return mapbox_fig

# Run the visualization with our dataframe
flow_map = visualize_banking_flows_on_map(df)
flow_map 


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



Map visualizations created and saved as HTML files:
- banking_flows_map.html
- banking_flows_mapbox.html
