# Notebook Configuration

In [13]:
# General Configuration Variables
DATASET_RESULT_DIR = "synthetic-dataset/data"
TOPOLOGIES_RESULT_DIR = "synthetic-dataset/synthetic-topologies"
# DEVICES_DATASET_PATH = "eua-dataset/edge-servers/site-optus-melbCBD.csv"
DEVICES_DATASET_PATH = "eua-dataset/edge-servers/site.csv"
VENDORS_TO_CONSIDER = ["Telstra", "Optus", "Vodafone", "Telecom", "Macquarie"]

# Synthetic Dataset Generation

In [None]:
import os

if not os.path.exists(DATASET_RESULT_DIR):
  os.makedirs(DATASET_RESULT_DIR)


if not os.path.exists(TOPOLOGIES_RESULT_DIR):
  os.makedirs(TOPOLOGIES_RESULT_DIR)

In [5]:
import pandas as pd

def load_devices_dataframe(path: str) -> pd.DataFrame:
    """
    Reads the CSV and returns a DataFrame with the required columns.
    """
    df = pd.read_csv(path)
    
    # Rename columns for consistency
    df.rename(
        columns={
            "SITE_ID": "device_id",
            "LATITUDE": "latitude",
            "LONGITUDE": "longitude",
            "NAME": "name",
            "STATE": "state",
            "LICENSING_AREA_ID": "licensing_area_id",
            "POSTCODE": "postcode",
            "SITE_PRECISION": "site_precision",
            "ELEVATION": "elevation",
            "HCIS_L2": "hcis_l2",
        },
        inplace=True,
    )
    
    # Set device_id as index
    df.set_index("device_id", inplace=True, drop=False)
    
    # Remove unnecessary columns if any exist
    df = df[
        [
            "name",
            "latitude",
            "longitude",
            "elevation",
        ]
    ]
    
    return df
  
devices_df = load_devices_dataframe(DEVICES_DATASET_PATH)

print("Dataset size:", len(devices_df))
devices_df.head()

Dataset size: 95562


Unnamed: 0_level_0,name,latitude,longitude,elevation
device_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1000,Fort Hill Wharf DARWIN,-12.471947,130.845073,
10000,Cnr Castlereagh & Lethbri PENRITH,-33.756158,150.698182,
10000002,Optus 50m Lattice Tower 71 Eastward Road Utakarra,-28.77766,114.63426,
10000003,6 Knuckey Street Darwin,-12.464597,130.840708,
10000004,Cape Wickham Links Clubhouse KING ISLAND,-39.5964,143.9339,


In [6]:
import re

# Filter devices by vendor names
pattern = "|".join(re.escape(v) for v in VENDORS_TO_CONSIDER)
mask = devices_df["name"].str.contains(pattern, case=False, na=False)
devices_df = devices_df.loc[mask].copy()

# Extract and standardize provider names
devices_df["provider"] = (
  devices_df["name"]
  .str.extract(f"({pattern})", flags=re.IGNORECASE)[0]
  .str.upper()
)

# Drop the original name column
devices_df.drop(columns=["name"], inplace=True)

print("Total devices after filtering:", len(devices_df))
devices_df

Total devices after filtering: 18822


Unnamed: 0_level_0,latitude,longitude,elevation,provider
device_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10000002,-28.777660,114.634260,,OPTUS
100001,-38.248652,144.605442,23.0,TELSTRA
10000114,-31.901910,152.533540,,OPTUS
100002,-37.728550,145.222007,116.0,OPTUS
10000215,-32.981570,121.644400,,TELSTRA
...,...,...,...,...
9954,-34.819950,147.902049,714.0,TELSTRA
9958,-34.971752,147.998115,709.0,TELSTRA
9967,-36.130494,144.750901,100.0,TELSTRA
9980,-33.754568,150.716963,48.0,OPTUS


In [None]:
devices_df.to_csv(os.path.join(DATASET_RESULT_DIR, "devices.csv"))

In [8]:
import numpy as np
from typing import Dict, List, Tuple, Optional

def assign_device_resources(
    df: pd.DataFrame,
    config: Optional[Dict] = None
) -> pd.DataFrame:
    """
    Assigns available resources to each device intelligently and realistically.
    
    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame with devices
    config : Dict, optional
        Configuration for resource assignment. Structure:
        {
            'global': {
                'group_percentages': {1: 33.0, 2: 33.0, 3: 34.0},  # Percentage of devices in each group
                'group_ranges': {1: (0, 33), 2: (33, 66), 3: (66, 100)}  # % range for each group
            },
            'attributes': {
                'available_RAM': {
                    'min': 1,
                    'max': 128,
                    'local_distribution': {
                        1: [(60, 0, 60), (40, 60, 100)],  # Group 1: (% devices, % min range, % max range)
                        2: [(50, 0, 50), (50, 50, 100)],
                        3: [(30, 0, 40), (70, 40, 100)]
                    }
                },
                'available_Storage': {
                    'min': 10,
                    'max': 2000
                },
                'available_vCPU': {
                    'min': 1,
                    'max': 64
                },
                'GPU_available': {
                    'group_percentages': {1: 10, 2: 30, 3: 60}  # % of true per group
                },
                'TPU_available': {
                    'group_percentages': {1: 5, 2: 15, 3: 40}
                }
            }
        }
    
    Returns:
    --------
    pd.DataFrame with resource columns added
    """
    
    # Default configuration
    default_config = {
        'global': {
            'group_percentages': {1: 33.33, 2: 33.33, 3: 33.34},
            'group_ranges': {1: (0, 33), 2: (33, 66), 3: (66, 100)}
        },
        'attributes': {
            'available_RAM': {'min': 1, 'max': 128},
            'available_Storage': {'min': 10, 'max': 2000},
            'available_vCPU': {'min': 1, 'max': 64},
            'GPU_available': {'group_percentages': {1: 10, 2: 30, 3: 60}},
            'TPU_available': {'group_percentages': {1: 5, 2: 15, 3: 40}}
        }
    }
    
    # Use provided configuration or default
    if config is None:
        config = default_config
    else:
        # Merge with default values for unspecified items
        if 'global' not in config:
            config['global'] = default_config['global']
        else:
            if 'group_percentages' not in config['global']:
                config['global']['group_percentages'] = default_config['global']['group_percentages']
            else:
                # Normalize percentages if they don't sum to 100
                configured_groups = config['global']['group_percentages']
                total_pct = sum(configured_groups.values())
                if total_pct < 100:
                    remaining = 100 - total_pct
                    unconfigured = [g for g in [1, 2, 3] if g not in configured_groups]
                    if unconfigured:
                        split = remaining / len(unconfigured)
                        for g in unconfigured:
                            config['global']['group_percentages'][g] = split
            
            if 'group_ranges' not in config['global']:
                config['global']['group_ranges'] = default_config['global']['group_ranges']
        
        if 'attributes' not in config:
            config['attributes'] = default_config['attributes']
        else:
            for attr in default_config['attributes']:
                if attr not in config['attributes']:
                    config['attributes'][attr] = default_config['attributes'][attr]
    
    df_result = df.copy()
    n_devices = len(df_result)
    
    # Assign global groups to each device
    group_percentages = config['global']['group_percentages']
    groups = []
    for group, percentage in sorted(group_percentages.items()):
        count = int(n_devices * percentage / 100)
        groups.extend([group] * count)
    
    # Adjust if there's a rounding difference
    while len(groups) < n_devices:
        groups.append(3)
    groups = groups[:n_devices]
    
    # Shuffle randomly
    np.random.shuffle(groups)
    df_result['global_group'] = groups
    
    # Process integer attributes
    integer_attrs = ['available_RAM', 'available_Storage', 'available_vCPU']
    
    for attr in integer_attrs:
        attr_config = config['attributes'][attr]
        min_val = attr_config['min']
        max_val = attr_config['max']
        
        values = []
        
        for group in [1, 2, 3]:
            # Filter devices in this group
            group_mask = df_result['global_group'] == group
            group_size = group_mask.sum()
            
            if group_size == 0:
                continue
            
            # Get global range for this group
            group_range = config['global']['group_ranges'][group]
            range_min = min_val + (max_val - min_val) * group_range[0] / 100
            range_max = min_val + (max_val - min_val) * group_range[1] / 100
            
            # Check if there's a configured local distribution
            if 'local_distribution' in attr_config and group in attr_config['local_distribution']:
                local_dist = attr_config['local_distribution'][group]
                group_values = []
                
                for pct_devices, pct_min, pct_max in local_dist:
                    count = int(group_size * pct_devices / 100)
                    sub_min = range_min + (range_max - range_min) * pct_min / 100
                    sub_max = range_min + (range_max - range_min) * pct_max / 100
                    
                    sub_values = np.random.uniform(sub_min, sub_max, count)
                    group_values.extend(sub_values)
                
                # Adjust if there's a rounding difference
                while len(group_values) < group_size:
                    group_values.append(np.random.uniform(range_min, range_max))
                group_values = group_values[:group_size]
                
            else:
                # Uniform random generation
                group_values = np.random.uniform(range_min, range_max, group_size)
            
            values.extend(group_values)
        
        # Convert to integers and assign
        df_result[attr] = np.array(values, dtype=int)
    
    # Process boolean attributes
    boolean_attrs = ['GPU_available', 'TPU_available']
    
    for attr in boolean_attrs:
        attr_config = config['attributes'][attr]
        values = []
        
        for group in [1, 2, 3]:
            group_mask = df_result['global_group'] == group
            group_size = group_mask.sum()
            
            if group_size == 0:
                continue
            
            # Get percentage of True for this group
            if 'group_percentages' in attr_config and group in attr_config['group_percentages']:
                true_pct = attr_config['group_percentages'][group]
            else:
                true_pct = 50  # Default 50%
            
            # Generate boolean values
            true_count = int(group_size * true_pct / 100)
            group_values = [True] * true_count + [False] * (group_size - true_count)
            np.random.shuffle(group_values)
            
            values.extend(group_values)
        
        df_result[attr] = values
    
    return df_result

In [9]:
# Example usage with custom configuration
custom_config = {
    'global': {
        'group_percentages': {1: 40, 2: 35, 3: 25},  # More devices in group 1 (low capacity)
        'group_ranges': {1: (0, 33), 2: (33, 66), 3: (66, 100)}
    },
    'attributes': {
        'available_RAM': {
            'min': 2,
            'max': 128,
            'local_distribution': {
                1: [(70, 0, 50), (30, 50, 100)],  # Group 1: majority in low range
                2: [(50, 20, 70), (50, 70, 100)], # Group 2: medium distribution
                3: [(20, 0, 40), (80, 40, 100)]   # Group 3: majority in high range
            }
        },
        'available_Storage': {
            'min': 20,
            'max': 2000,
            'local_distribution': {
                1: [(80, 0, 60), (20, 60, 100)],
                2: [(40, 0, 50), (60, 50, 100)],
                3: [(30, 0, 50), (70, 50, 100)]
            }
        },
        'available_vCPU': {
            'min': 1,
            'max': 64
        },
        'GPU_available': {
            'group_percentages': {1: 5, 2: 25, 3: 70}  # More GPUs on powerful devices
        },
        'TPU_available': {
            'group_percentages': {1: 2, 2: 10, 3: 50}  # TPUs mainly in group 3
        }
    }
}

# Apply the function
devices_df = assign_device_resources(devices_df, custom_config)

print(f"\nDevices per group:")
print(devices_df['global_group'].value_counts().sort_index())
print(f"\nResource statistics:")
print(devices_df[['available_RAM', 'available_Storage', 'available_vCPU']].describe())
print(f"\nAccelerator availability:")
print(f"GPU_available: {devices_df['GPU_available'].sum()} ({devices_df['GPU_available'].sum()/len(devices_df)*100:.1f}%)")
print(f"TPU_available: {devices_df['TPU_available'].sum()} ({devices_df['TPU_available'].sum()/len(devices_df)*100:.1f}%)")

devices_df.head(10)


Devices per group:
global_group
1    7528
2    6587
3    4707
Name: count, dtype: int64

Resource statistics:
       available_RAM  available_Storage  available_vCPU
count   18822.000000       18822.000000    18822.000000
mean       59.396132         905.199713       28.638508
std        38.501338         601.979404       17.617431
min         2.000000          20.000000        1.000000
25%        20.000000         326.000000       13.000000
50%        63.000000         909.000000       27.000000
75%        85.000000        1326.000000       42.000000
max       127.000000        1999.000000       63.000000

Accelerator availability:
GPU_available: 5316 (28.2%)
TPU_available: 3161 (16.8%)


Unnamed: 0_level_0,latitude,longitude,elevation,provider,global_group,available_RAM,available_Storage,available_vCPU,GPU_available,TPU_available
device_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10000002,-28.77766,114.63426,,OPTUS,1,14,123,8,False,True
100001,-38.248652,144.605442,23.0,TELSTRA,2,2,405,3,False,False
10000114,-31.90191,152.53354,,OPTUS,3,6,332,6,False,False
100002,-37.72855,145.222007,116.0,OPTUS,1,5,162,2,False,False
10000215,-32.98157,121.6444,,TELSTRA,2,8,304,16,False,False
10000216,-35.02197,149.87236,,TELSTRA,3,3,283,8,False,False
10000264,-36.70694,144.31703,,TELSTRA,2,16,119,21,False,False
10000265,-24.182717,151.236237,,TELSTRA,3,10,163,9,False,False
10000272,-26.20426,152.44295,,TELSTRA,1,11,138,6,False,False
10000273,-27.78632,153.36421,,TELSTRA,2,8,236,11,False,False


In [None]:
devices_df.to_csv(os.path.join(DATASET_RESULT_DIR, "devices.csv"))

# Topologies Generator

In [23]:
import uuid
from math import radians, cos, sin, asin, sqrt
import json
import pandas as pd


def haversine(lon1: float, lat1: float, lon2: float, lat2: float) -> float:
    """
    Calculate the great circle (horizontal) distance between two points
    on the earth (specified in decimal degrees). Returns distance in meters.
    """
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * asin(sqrt(a))
    km = 6371 * c
    return km * 1000


def distance_3d(lon1: float, lat1: float, lon2: float, lat2: float, elev1: float = 0.0, elev2: float = 0.0) -> float:
    """
    Compute 3D distance between two points considering horizontal haversine distance
    and vertical elevation difference (meters). NaN elevations are treated as 0.
    """
    # Treat NaN elevations as 0
    elev1 = 0.0 if pd.isna(elev1) else float(elev1)
    elev2 = 0.0 if pd.isna(elev2) else float(elev2)
    d_horizontal = haversine(lon1, lat1, lon2, lat2)
    d_vertical = elev2 - elev1
    return (d_horizontal ** 2 + d_vertical ** 2) ** 0.5


def generate_topology(
    lat: float,
    long: float,
    rad: float,
    devices_df: pd.DataFrame,
    number_of_providers: Optional[int] = None,
    allowed_groups: Optional[List[int]] = None,
    number_of_devices: Optional[int] = None,
    center_elevation: float = 0.0
) -> Tuple[pd.DataFrame, str]:
    """
    Generate a sub-dataset of devices within a circular area and an HTML map
    styled similar to the reference map.html (Leaflet + custom UI).

    Elevation-aware distance: uses 3D distance combining horizontal haversine
    and elevation difference (meters). NaN elevations are treated as 0.

    Parameters:
    -----------
    lat : float
        Center latitude
    long : float
        Center longitude
    rad : float
        Radius in meters (applied to 3D distance)
    devices_df : pd.DataFrame
        Full devices dataset
    number_of_providers : int, optional
        Maximum number of providers. If None, unlimited.
    allowed_groups : List[int], optional
        List of allowed groups (1, 2, 3). If None, defaults to [1, 2, 3].
    number_of_devices : int, optional
        Number of devices to select. If None, select all within area.
    center_elevation : float, optional
        Elevation (meters) at center point. Defaults to 0.0

    Returns:
    --------
    Tuple[pd.DataFrame, str]
        - DataFrame with selected devices
        - UUID string of the topology
    """

    if allowed_groups is None:
        allowed_groups = [1, 2, 3]

    df_work = devices_df.copy()

    # Filter by allowed groups
    df_work = df_work[df_work['global_group'].isin(allowed_groups)]

    # Calculate 3D distance from center for each device
    df_work['distance_to_center'] = df_work.apply(
        lambda row: distance_3d(
            long,
            lat,
            row['longitude'],
            row['latitude'],
            center_elevation,
            row['elevation']
        ),
        axis=1
    )

    # Filter devices within radius (3D distance)
    df_work = df_work[df_work['distance_to_center'] <= rad]

    # Filter by provider constraint if specified
    if number_of_providers is not None:
        top_providers = df_work['provider'].value_counts().head(number_of_providers).index.tolist()
        df_work = df_work[df_work['provider'].isin(top_providers)]

    # Select number_of_devices random devices if specified
    if number_of_devices is not None and len(df_work) > number_of_devices:
        df_work = df_work.sample(n=number_of_devices, random_state=None)
    elif number_of_devices is not None and len(df_work) < number_of_devices:
        print(f"Warning: Only {len(df_work)} devices found, but {number_of_devices} were requested.")

    # Remove the temporary distance column
    df_work = df_work.drop(columns=['distance_to_center'])

    # Generate UUID for this topology
    topology_id = str(uuid.uuid4())

    # Create topology directory under configured TOPOLOGIES_RESULT_DIR
    topology_dir = os.path.join(TOPOLOGIES_RESULT_DIR, topology_id)
    os.makedirs(topology_dir, exist_ok=True)

    # Save CSV
    csv_path = os.path.join(topology_dir, "devices.csv")
    df_work.to_csv(csv_path)

    # Prepare provider colors (matching reference style palette)
    base_colors = {
        'OPTUS': '#ff4444',
        'TELSTRA': '#ff6666',
        'MACQUARIE': '#ff8888',
        'TELECOM': '#ffaa99',
        'VODAFONE': '#ffccbb'
    }
    unique_providers = df_work['provider'].unique().tolist()
    provider_colors = {}
    fallback_palette = ['#667eea', '#764ba2', '#48bb78', '#ed8936', '#e53e3e', '#3182ce']
    for i, p in enumerate(unique_providers):
        provider_colors[p] = base_colors.get(p, fallback_palette[i % len(fallback_palette)])

    # Build devices payload for embedding in HTML
    devices_payload = []
    for idx, row in df_work.iterrows():
        devices_payload.append({
            'device_id': str(idx),
            'latitude': float(row['latitude']),
            'longitude': float(row['longitude']),
            'provider': str(row['provider']),
            'global_group': int(row['global_group']),
            'available_RAM': int(row['available_RAM']),
            'available_Storage': int(row['available_Storage']),
            'available_vCPU': int(row['available_vCPU'])
        })

    # Legend HTML for providers
    legend_items_html = "\n".join([
        f"            <div class=\"legend-item\">\n                <div class=\"legend-color\" style=\"background-color: {provider_colors[p]};\"></div>\n                <span>{p}</span>\n            </div>"
        for p in unique_providers
    ])

    # Provider filter options
    provider_options_html = "\n".join([
        "                    <option value=\"\">All</option>"
    ] + [
        f"                    <option value=\"{p}\">{p}</option>"
        for p in unique_providers
    ])

    # Construct HTML content matching reference design
    html_content = f"""<!DOCTYPE html>
<html lang=\"en\">
<head>
    <meta charset=\"UTF-8\">
    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">
    <title>Device Topology Map</title>
    <link rel=\"stylesheet\" href=\"https://cdnjs.cloudflare.com/ajax/libs/leaflet/1.9.4/leaflet.min.css\" />
    <script src=\"https://cdnjs.cloudflare.com/ajax/libs/leaflet/1.9.4/leaflet.min.js\"></script>
    <style>
        * {{
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }}
        body {{
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            background-color: #f5f5f5;
        }}
        .container {{
            display: flex;
            flex-direction: column;
            height: 100vh;
        }}
        .header {{
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            padding: 20px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
            z-index: 10;
        }}
        .header h1 {{
            font-size: 24px;
            margin-bottom: 8px;
        }}
        .header p {{
            font-size: 14px;
            opacity: 0.9;
        }}
        .controls {{
            background: white;
            padding: 15px 20px;
            border-bottom: 1px solid #e0e0e0;
            display: flex;
            gap: 15px;
            align-items: center;
            flex-wrap: wrap;
            box-shadow: 0 1px 3px rgba(0,0,0,0.05);
            z-index: 9;
        }}
        .control-group {{
            display: flex;
            align-items: center;
            gap: 8px;
        }}
        .control-group label {{
            font-weight: 500;
            font-size: 14px;
            color: #333;
        }}
        .control-group select,
        .control-group input {{
            padding: 8px 12px;
            border: 1px solid #ddd;
            border-radius: 4px;
            font-size: 14px;
            background: white;
            cursor: pointer;
            transition: border-color 0.2s;
        }}
        .control-group select:hover,
        .control-group input:hover {{
            border-color: #667eea;
        }}
        .control-group select:focus,
        .control-group input:focus {{
            outline: none;
            border-color: #667eea;
            box-shadow: 0 0 5px rgba(102, 126, 234, 0.2);
        }}
        .stats {{
            display: flex;
            gap: 20px;
            font-size: 14px;
            color: #666;
            margin-left: auto;
        }}
        .stats span {{
            font-weight: 500;
        }}
        #map {{
            flex: 1;
            position: relative;
        }}
        .legend {{
            background: white;
            padding: 12px 16px;
            border-radius: 4px;
            box-shadow: 0 2px 6px rgba(0,0,0,0.1);
            position: absolute;
            bottom: 20px;
            right: 20px;
            z-index: 1000;
            font-size: 12px;
            max-width: 200px;
        }}
        .legend h4 {{
            margin-bottom: 8px;
            font-size: 13px;
            font-weight: 600;
        }}
        .legend-item {{
            display: flex;
            align-items: center;
            gap: 8px;
            margin-bottom: 6px;
        }}
        .legend-color {{
            width: 12px;
            height: 12px;
            border-radius: 50%;
            border: 1px solid white;
            box-shadow: 0 0 3px rgba(0,0,0,0.2);
        }}
        .leaflet-popup-content {{
            max-height: 400px;
            overflow-y: auto;
            font-size: 12px;
        }}
        .popup-table {{
            width: 100%;
            border-collapse: collapse;
        }}
        .popup-table tr {{
            border-bottom: 1px solid #eee;
        }}
        .popup-table td {{
            padding: 6px;
        }}
        .popup-table td:first-child {{
            font-weight: 600;
            color: #667eea;
            width: 40%;
            word-break: break-word;
        }}
        .popup-table td:last-child {{
            text-align: right;
            word-break: break-word;
        }}
    </style>
</head>
<body>
    <div class=\"container\">
        <div class=\"header\">
            <h1>üó∫Ô∏è Device Topology</h1>
            <p>Geographic location of devices with dynamic rendering</p>
        </div>
        <div class=\"controls\">
            <div class=\"control-group\">
                <label for=\"providerFilter\">Filter by provider:</label>
                <select id=\"providerFilter\">
{provider_options_html}
                </select>
            </div>
            <div class=\"control-group\">
                <label for=\"searchDevice\">Search device:</label>
                <input type=\"text\" id=\"searchDevice\" placeholder=\"E.g: device id\">
            </div>
            <div class=\"stats\">
                <span id=\"deviceCount\">Total: {len(devices_payload)}</span>
                <span id=\"visibleCount\">Visible: 0</span>
            </div>
        </div>
        <div id=\"map\"></div>
        <div class=\"legend\">
            <h4>Providers</h4>
{legend_items_html}
        </div>
    </div>
    <script>
        const devices = {json.dumps(devices_payload)};
        const providerColors = {json.dumps(provider_colors)};
        const centerLat = {lat};
        const centerLong = {long};
        const radiusMeters = {rad};

        // Initialize map
        const map = L.map('map').setView([centerLat, centerLong], 12);
        L.tileLayer('https://{{s}}.tile.openstreetmap.org/{{z}}/{{x}}/{{y}}.png', {{
            maxZoom: 19,
            attribution: '&copy; OpenStreetMap contributors'
        }}).addTo(map);

        // Draw search radius circle
        const radiusCircle = L.circle([centerLat, centerLong], {{
            radius: radiusMeters,
            color: 'blue',
            fillColor: 'blue',
            fillOpacity: 0.1
        }}).addTo(map);
        
        // Center marker
        const centerMarker = L.marker([centerLat, centerLong]).addTo(map);
        centerMarker.bindPopup('Center');

        // Create markers
        const markers = [];
        const markerLayer = L.layerGroup().addTo(map);

        function devicePopupHtml(d) {{
            return `
            <table class="popup-table">
                <tr><td>Device ID</td><td>${{d.device_id}}</td></tr>
                <tr><td>Provider</td><td>${{d.provider}}</td></tr>
                <tr><td>Group</td><td>${{d.global_group}}</td></tr>
                <tr><td>RAM</td><td>${{d.available_RAM}} GB</td></tr>
                <tr><td>Storage</td><td>${{d.available_Storage}} GB</td></tr>
                <tr><td>vCPU</td><td>${{d.available_vCPU}}</td></tr>
                <tr><td>Latitude</td><td>${{d.latitude.toFixed(6)}}</td></tr>
                <tr><td>Longitude</td><td>${{d.longitude.toFixed(6)}}</td></tr>
            </table>`;
        }}

        function renderMarkers(providerFilterValue) {{
            markerLayer.clearLayers();
            let visible = 0;
            devices.forEach(d => {{
                if (providerFilterValue && d.provider !== providerFilterValue) return;
                const color = providerColors[d.provider] || '#3182ce';
                const m = L.circleMarker([d.latitude, d.longitude], {{
                    radius: 6,
                    color: color,
                    fillColor: color,
                    fillOpacity: 0.9,
                    weight: 1
                }});
                m.bindPopup(devicePopupHtml(d));
                m.addTo(markerLayer);
                markers.push(m);
                visible += 1;
            }});
            document.getElementById('visibleCount').innerText = `Visible: ${{visible}}`;
        }}

        // Initial render
        renderMarkers('');

        // Provider filter logic
        const providerFilter = document.getElementById('providerFilter');
        providerFilter.addEventListener('change', (e) => {{
            renderMarkers(e.target.value);
        }});

        // Search device logic
        const searchInput = document.getElementById('searchDevice');
        searchInput.addEventListener('keydown', (e) => {{
            if (e.key === 'Enter') {{
                const val = e.target.value.trim();
                const found = devices.find(d => d.device_id === val);
                if (found) {{
                    map.setView([found.latitude, found.longitude], 15);
                    const color = providerColors[found.provider] || '#3182ce';
                    const tempMarker = L.circleMarker([found.latitude, found.longitude], {{
                        radius: 8,
                        color: color,
                        fillColor: color,
                        fillOpacity: 0.9,
                        weight: 2
                    }}).addTo(map);
                    tempMarker.bindPopup(devicePopupHtml(found)).openPopup();
                    setTimeout(() => map.removeLayer(tempMarker), 5000);
                }}
            }}
        }});
    </script>
</body>
</html>
"""

    # Save HTML
    html_path = os.path.join(topology_dir, "map.html")
    with open(html_path, 'w', encoding='utf-8') as f:
        f.write(html_content)

    # Save topology metadata
    metadata = {
        'topology_id': topology_id,
        'center_lat': lat,
        'center_long': long,
        'radius_meters': rad,
        'num_devices': len(df_work),
        'allowed_groups': allowed_groups,
        'max_providers': number_of_providers,
        'requested_devices': number_of_devices,
        'providers_in_topology': unique_providers,
        'num_providers': len(unique_providers)
    }

    metadata_path = os.path.join(topology_dir, "metadata.json")
    with open(metadata_path, 'w') as f:
        json.dump(metadata, f, indent=2)

    print(f"Topology {topology_id} generated successfully!")
    print(f"  - Devices selected: {len(df_work)}")
    print(f"  - Providers: {len(unique_providers)} ({', '.join(unique_providers)})")
    print(f"  - Groups: {sorted(df_work['global_group'].unique().tolist())}")
    print(f"  - Files saved in: {topology_dir}")

    return df_work, topology_id

In [24]:
# Example: Generate topology for Melbourne CBD
# Parameters:
# - Center: Melbourne CBD (-37.8136, 144.9631)
# - Radius: 5000 meters (5 km)
# - Max providers: 3
# - Allowed groups: [2, 3] (medium and high capacity devices)
# - Number of devices: 50

topology_devices, topology_id = generate_topology(
    lat=-37.8136,
    long=144.9631,
    rad=5000,
    devices_df=devices_df,
    number_of_providers=3,
    allowed_groups=[2, 3],
    number_of_devices=50
)

print(f"\nTopology ID: {topology_id}")
print(f"\nTopology devices summary:")
print(topology_devices[['provider', 'global_group', 'available_RAM', 'available_Storage', 'available_vCPU']].describe())

Topology e9e51e32-68e6-4ed6-b171-ef902d555c4c generated successfully!
  - Devices selected: 50
  - Providers: 3 (TELSTRA, VODAFONE, OPTUS)
  - Groups: [2, 3]
  - Files saved in: synthetic-dataset/synthetic-topologies/e9e51e32-68e6-4ed6-b171-ef902d555c4c

Topology ID: e9e51e32-68e6-4ed6-b171-ef902d555c4c

Topology devices summary:
       global_group  available_RAM  available_Storage  available_vCPU
count     50.000000      50.000000          50.000000       50.000000
mean       2.400000      61.660000         876.900000       29.060000
std        0.494872      31.977422         470.102822       14.885152
min        2.000000       2.000000          96.000000        1.000000
25%        2.000000      54.000000         718.750000       21.250000
50%        2.000000      63.500000         848.000000       27.500000
75%        3.000000      75.750000        1016.500000       38.750000
max        3.000000     122.000000        1943.000000       62.000000


# Pricing Generator

In [39]:
import yaml
from datetime import date

def generate_pricing_from_topology(
    topology_id: str,
    compatible_provider_groups: Optional[List[List[str]]] = None
) -> str:
    """
    Generate a Pricing2Yaml representation of a topology and save it to a YAML file.
    
    Parameters:
    -----------
    topology_id : str
        UUID of the topology to generate pricing for
    compatible_provider_groups : List[List[str]], optional
        List of provider groups that can be used together. If None, devices are
        only compatible within their own provider. Example: [["OPTUS", "TELSTRA"], ["VODAFONE"]]
        means OPTUS and TELSTRA devices can be used together, but not with VODAFONE devices.
    
    Returns:
    --------
    str
        Path to the generated YAML file
    """
    
    # Build topology directory path
    topology_dir = os.path.join(TOPOLOGIES_RESULT_DIR, topology_id)
    
    if not os.path.exists(topology_dir):
        raise FileNotFoundError(f"Topology directory not found: {topology_dir}")
    
    # Load the devices CSV for this topology
    devices_csv_path = os.path.join(topology_dir, "devices.csv")
    devices_df = pd.read_csv(devices_csv_path, index_col=0)
    
    # Load metadata
    metadata_path = os.path.join(topology_dir, "metadata.json")
    with open(metadata_path, 'r') as f:
        metadata = json.load(f)
    
    # Build compatibility map based on provider groups
    # If compatible_provider_groups is None, each device only works with its own provider
    provider_compatibility = {}
    if compatible_provider_groups is None:
        # Default: each provider only works with itself
        for provider in metadata['providers_in_topology']:
            provider_compatibility[provider] = [provider]
    else:
        # Map each provider to its compatible group
        for group in compatible_provider_groups:
            for provider in group:
                provider_compatibility[provider] = group
    
    # Precompute add-on identifiers per device for reuse
    addon_ids = {
        idx: f"{row['provider']}_{idx}" for idx, row in devices_df.iterrows()
    }
    
    # Build the Pricing2Yaml structure
    pricing = {
        'saasName': topology_id,
        'syntaxVersion': '3.0',
        'version': '1.0.0',
        'createdAt': date.today().isoformat(),
        'features': {
            'gpu_available': {
                'type': 'BOOLEAN',
                'defaultValue': False
            },
            'tpu_available': {
                'type': 'BOOLEAN',
                'defaultValue': False
            },
            'deployment': {
                'type': 'BOOLEAN',
                'defaultValue': True
            }
        },
        'usageLimits': {
            'available_ram': {
                'unit': 'GB',
                'type': 'NON_RENEWABLE',
                'defaultValue': 0,
            },
            'available_storage': {
                'unit': 'GB',
                'type': 'NON_RENEWABLE',
                'defaultValue': 0,
            },
            'available_vcpu': {
                'unit': 'vCPU',
                'type': 'NON_RENEWABLE',
                'defaultValue': 0,
            }
        },
        'addOns': {}
    }
    
    # Create an add-on for each device
    for idx, row in devices_df.iterrows():
        device_id = str(idx)
        provider = row['provider']
        provider_device_id = addon_ids[idx]
        
        # Determine compatible devices (same group as this provider)
        compatible_providers = provider_compatibility.get(provider, [provider])
        
        # Find all add-on IDs that belong to compatible providers
        compatible_addon_ids = []
        for other_idx, other_row in devices_df.iterrows():
            other_provider = other_row['provider']
            if other_provider in compatible_providers and other_idx != idx:
                compatible_addon_ids.append(addon_ids[other_idx])
        
        # Find all add-on IDs that should be excluded (not compatible)
        all_addon_ids = set(addon_ids.values())
        excluded_device_ids = list(all_addon_ids - set(compatible_addon_ids) - {provider_device_id})
        
        # Create the add-on
        addon = {
            'features': {
                'gpu_available': {
                  'value': bool(row['GPU_available'])
                },
                'tpu_available': {
                  'value': bool(row['TPU_available'])
                },
                'deployment': {
                  'value': True
                },
            },
            'usageLimits': {
                'available_ram': {
                  'value': int(row['available_RAM'])
                },
                'available_storage': {
                  'value': int(row['available_Storage'])
                },
                'available_vcpu': {
                  'value': int(row['available_vCPU'])
                },
            },
            'excludes': excluded_device_ids if excluded_device_ids else []
        }
        
        # Use provider and device info in the add-on ID
        addon_id = provider_device_id
        pricing['addOns'][addon_id] = addon
    
    # Save to YAML file
    yaml_path = os.path.join(topology_dir, "pricing.yml")
    with open(yaml_path, 'w', encoding='utf-8') as f:
        yaml.dump(
            pricing,
            f,
            default_flow_style=False,
            allow_unicode=True,
            sort_keys=False
        )
    
    print(f"Pricing YAML generated successfully!")
    print(f"  - Topology ID: {topology_id}")
    print(f"  - Devices: {len(devices_df)}")
    print(f"  - Providers: {metadata['providers_in_topology']}")
    print(f"  - File saved: {yaml_path}")
    
    return yaml_path

In [40]:
# Example 1: Generate pricing without provider compatibility (default: each provider isolated)
print("=" * 80)
print("Example 1: Pricing with provider isolation")
print("=" * 80)
pricing_path_1 = generate_pricing_from_topology(topology_id)

# Example 2: Generate pricing with compatible provider groups
# print("\n" + "=" * 80)
# print("Example 2: Pricing with provider groups (OPTUS+TELSTRA compatible)")
# print("=" * 80)
# compatible_groups = [["OPTUS", "TELSTRA"], ["VODAFONE"]]
# pricing_path_2 = generate_pricing_from_topology(topology_id, compatible_provider_groups=compatible_groups)

Example 1: Pricing with provider isolation
Pricing YAML generated successfully!
  - Topology ID: e9e51e32-68e6-4ed6-b171-ef902d555c4c
  - Devices: 50
  - Providers: ['TELSTRA', 'VODAFONE', 'OPTUS']
  - File saved: synthetic-dataset/synthetic-topologies/e9e51e32-68e6-4ed6-b171-ef902d555c4c/pricing.yml
Pricing YAML generated successfully!
  - Topology ID: e9e51e32-68e6-4ed6-b171-ef902d555c4c
  - Devices: 50
  - Providers: ['TELSTRA', 'VODAFONE', 'OPTUS']
  - File saved: synthetic-dataset/synthetic-topologies/e9e51e32-68e6-4ed6-b171-ef902d555c4c/pricing.yml


# Problem Resolution

In [17]:
# ----- FIRST ITERATION OF THE PROBLEM -----

# Location of users
  # Either as coordinates (a point)
    # Latitude
    # Longitude
  # or in zones (a geographic area)
# Providers to consider
# Maximum budget (mandatory)
# Maximum number of devices (if not set, max = total available devices)
# Minimum resources (if not set, all resources set to 0)
# Maximum distance to users (if not set, max = infinite; declared in meters)

# ----- BACKLOG OF THE PROBLEM -----

# Maximum number of providers
# Maximum distance between the different nodes of S
# Set of services to host
  # Requirements of each service
  # Distance constraints between services