# End-to-End Flow for Mobility Robustness Optimization (MRO)

This notebook outlines the flow from loading the initial data to executing the `perform_attachment_hyst_ttt` function for MRO with hysteresis and time-to-trigger.

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import math

## Load Data

In [19]:
# Ensure that 'UE_data_20UE_100ticks.csv' and 'topology.csv'
# are present in the './Data/MRO/' directory.
ue_data = pd.read_csv('./Data/MRO/UE_data_20UE_100ticks.csv')
topology = pd.read_csv('./Data/MRO/topology.csv')
ue_data = ue_data.rename(columns={'mock_ue_id': 'ue_id'})

# Apply the same topology modifications as in the original notebook
topology.loc[topology['cell_id'] == 'cell_2', 'cell_lat'] = 0
topology.loc[topology['cell_id'] == 'cell_3', 'cell_lat'] = 90
topology.loc[topology['cell_id'] == 'cell_1', 'cell_lat'] = -90
topology.loc[topology['cell_id'] == 'cell_2', 'cell_lon'] = 0
topology.loc[topology['cell_id'] == 'cell_3', 'cell_lon'] = 180
topology.loc[topology['cell_id'] == 'cell_1', 'cell_lon'] = -180
topology.loc[topology['cell_id'] == 'cell_2', 'cell_carrier_freq_mhz'] = 2100
topology.loc[topology['cell_id'] == 'cell_3', 'cell_carrier_freq_mhz'] = 2100
topology.loc[topology['cell_id'] == 'cell_1', 'cell_carrier_freq_mhz'] = 2100

## Define Helper Functions

In [22]:
RADIUS_EARTH_EQUATOR_KM = 6378.137
CIRC_KM_TO_DEG_LAT: float = 180.0 / (math.pi * RADIUS_EARTH_EQUATOR_KM)
LATENT_BACKGROUND_NOISE_DB = -150
from typing import Any, List, Tuple, Union

class GISTools:
    R: float = 6378.1
    OneDegree: float = R * 2 * math.pi / 360 * 1000

    @staticmethod
    def dist(l1: Tuple[float, float], l2: Tuple[float, float], abs_tol: float = 0.0002) -> float:
        if GISTools.isclose(l1, l2, abs_tol=abs_tol):
            return 0.0
        [phi1, lam1] = [math.radians(l1[0]), math.radians(l1[1])]
        [phi2, lam2] = [math.radians(l2[0]), math.radians(l2[1])]
        d = (2 * GISTools.R * math.asin(math.sqrt(math.sin((phi2 - phi1) / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin((lam2 - lam1) / 2) ** 2)))
        return d

    @staticmethod
    def isclose(A: Tuple[float, float], B: Tuple[float, float], abs_tol: float = 0.0002) -> bool:
        try:
            return math.isclose(A[0], B[0], abs_tol=abs_tol) and math.isclose(A[1], B[1], abs_tol=abs_tol)
        except AttributeError:
            return abs(A[0] - B[0]) <= max(1e-9 * max(abs(A[0]), abs(B[0])), abs_tol) and abs(A[1] - B[1]) <= max(1e-9 * max(abs(A[1]), abs(B[1])), abs_tol)

    @staticmethod
    def get_log_distance(lat1, lon1, lat2, lon2, epsilon=1.0):
        return np.log(epsilon + 1000.0 * GISTools.dist((lat1, lon1), (lat2, lon2)))

    @staticmethod
    def get_relative_bearing(cell_az_deg, cell_lat, cell_lon, lat, lon):
        def rel_bearing(heading_0_to_360: float, target_bearing_0_to_360: float) -> float:
            return (target_bearing_0_to_360 - heading_0_to_360) % 360

        def convert_bearing_0_to_360(bearing_minus180_to_180: float) -> float:
            return (bearing_minus180_to_180 + 360) % 360

        def get_bearing(l1: Tuple[float, float], l2: Tuple[float, float]) -> float:
            [phi1, lam1] = [math.radians(l1[0]), math.radians(l1[1])]
            [phi2, lam2] = [math.radians(l2[0]), math.radians(l2[1])]
            y = math.sin(lam2 - lam1) * math.cos(phi2)
            x = math.cos(phi1) * math.sin(phi2) - math.sin(phi1) * math.cos(phi2) * math.cos(lam2 - lam1)
            return math.degrees(math.atan2(y, x))

        return rel_bearing(
            cell_az_deg,
            convert_bearing_0_to_360(
                get_bearing(
                    (cell_lat, cell_lon),
                    (lat, lon),
                )
            ),
        )

In [23]:
from typing import Tuple

def connect_ue_to_all_cells(ue_data, topology):
    results = []
    for _, ue_row in ue_data.iterrows():
        ue_lat = ue_row['latitude']
        ue_lon = ue_row['longitude']
        for _, cell_row in topology.iterrows():
            distance = GISTools.dist((ue_lat, ue_lon), (cell_row['cell_lat'], cell_row['cell_lon']))
            combined_data = {
                'ue_id': ue_row['ue_id'],
                'longitude': ue_row['longitude'],
                'latitude': ue_row['latitude'],
                'tick': ue_row['tick'],
                'cell_lat': cell_row['cell_lat'],
                'cell_lon': cell_row['cell_lon'],
                'cell_id': cell_row['cell_id'],
                'cell_az_deg': cell_row['cell_az_deg'],
                'cell_carrier_freq_mhz': cell_row['cell_carrier_freq_mhz'],
                'distance': distance
            }
            results.append(combined_data)
    full_data = pd.DataFrame(results)
    full_data.drop(columns=['distance'], inplace=True)
    return full_data

def haversine(lat1, lon1, lat2, lon2):
    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    R = 6371.0
    return R * c

def calculate_received_power(tx_power_dbm, distance_km, frequency_mhz):
    distance_m = distance_km * 1000
    fspl_db = 20 * np.log10(distance_m) + 20 * np.log10(frequency_mhz) - 27.55
    received_power_dbm = tx_power_dbm - fspl_db
    return received_power_dbm

def add_sinr_column(df: pd.DataFrame) -> pd.DataFrame:
    def compute_layer_sinr(group: pd.DataFrame) -> float:
        serving_row = group.loc[group['cell_rxpower_dbm'].idxmax()]
        serving_rsrp_dbm = serving_row['cell_rxpower_dbm']
        noise_linear = 10 ** (LATENT_BACKGROUND_NOISE_DB / 10)
        total_power_linear = np.sum(10 ** (group['cell_rxpower_dbm'] / 10))
        serving_power_linear = 10 ** (serving_rsrp_dbm / 10)
        interference_linear = total_power_linear - serving_power_linear
        total_interference_noise_dbm = 10 * np.log10(interference_linear + noise_linear)
        sinr_db = serving_rsrp_dbm - total_interference_noise_dbm
        return sinr_db

    ue_sinr = {}
    for ue_id, ue_group in df.groupby("ue_id"):
        sinr_by_freq = {}
        for freq, freq_group in ue_group.groupby("cell_carrier_freq_mhz"):
            sinr_by_freq[freq] = compute_layer_sinr(freq_group)
        max_sinr = max(sinr_by_freq.values())
        ue_sinr[ue_id] = max_sinr
    df = df.copy()
    df["sinr_db"] = df["ue_id"].map(ue_sinr)
    return df

## Process Data

In [24]:
tx_power_dbm = 23
full_data = connect_ue_to_all_cells(ue_data, topology)
full_data['distance_km'] = full_data.apply(lambda row: GISTools.get_log_distance(
    row['latitude'], row['longitude'], row['cell_lat'], row['cell_lon']), axis=1)
full_data['cell_rxpower_dbm'] = full_data.apply(lambda row: calculate_received_power(
    tx_power_dbm, row['distance_km'], row['cell_carrier_freq_mhz']), axis=1)
full_data = add_sinr_column(full_data)
preproceseed_prediction_ue_data = full_data.rename(columns={'latitude': 'loc_y', 'longitude': 'loc_x'})

## Define Hyperparameters

In [25]:
hyst = 1
ttt = 5
rlf_threshold = -24.5

## Define Attachment Logic Functions

In [200]:
from typing import List

def _update_current_strongest(ue_data_for_current_tick, past_attachment, hyst):
    merged_df = pd.merge(ue_data_for_current_tick, past_attachment,
                             on='ue_id', how='left', suffixes=('', '_past'))
    final_data = []
    for ue_id, group in merged_df.groupby('ue_id'):
        best_row = None
        best_power = -999
        for _, row in group.iterrows():
            current_power = row['cell_rxpower_dbm']
            past_cell_id = row['cell_id_past']
            past_data = ue_data_for_current_tick[(ue_data_for_current_tick['ue_id'] == ue_id) &
                                                (ue_data_for_current_tick['cell_id'] == past_cell_id)]
            if not past_data.empty:
                past_power = past_data.iloc[0]['cell_rxpower_dbm']
            else:
                past_power = -999
            if current_power - past_power >= hyst:
                if current_power > best_power:
                    best_row = row
                    best_power = current_power
            else:
                if past_power > best_power:
                    best_row = past_data.iloc[0]
                    best_power = past_power
        if best_row is not None:
            final_data.append(best_row)
    final_df = pd.DataFrame(final_data)
    final_df = final_df.loc[:, ~final_df.columns.str.endswith('_past')]
    return final_df


def _update_current_attachment(strongest_server_history, ue_data_for_current_tick, past_attachment):
    current_attachment_list = [] # contains updated ue -> cell + current data
    merged_df = pd.concat(strongest_server_history, ignore_index=True)

    #individual UE scope
    for UE in ue_data_for_current_tick['ue_id'].unique():
        # Check consistency for this specific UE
        history_consistency_check = merged_df[merged_df['ue_id'] == UE]['cell_id'].nunique()
        if history_consistency_check > 0:
            consistent_cell_id = merged_df[merged_df['ue_id'] == UE]['cell_id'].unique()[0]

            # attach consistent cell or past cell decision
            if history_consistency_check == 1:
                matching_rows = ue_data_for_current_tick[(ue_data_for_current_tick['ue_id'] == UE) & (ue_data_for_current_tick['cell_id'] == consistent_cell_id)]
                if not matching_rows.empty:
                    current_attachment_list.append(matching_rows.iloc[0].copy()) # Ensure a copy
            else:
                past_cell_id_df = past_attachment[past_attachment['ue_id'] == UE]['cell_id']
                if not past_cell_id_df.empty:
                    past_cell_id = past_cell_id_df.values[0]
                    if past_cell_id == "RLF":
                        highest_power_row = ue_data_for_current_tick[ue_data_for_current_tick['ue_id'] == UE].nlargest(1, 'cell_rxpower_dbm').iloc[0].copy() # Ensure a copy
                        current_attachment_list.append(highest_power_row)
                    else:
                        matching_rows = ue_data_for_current_tick[(ue_data_for_current_tick['ue_id'] == UE) & (ue_data_for_current_tick['cell_id'] == past_cell_id)]
                        if not matching_rows.empty:
                            current_attachment_list.append(matching_rows.iloc[0].copy()) # Ensure a copy

    current_attachment = pd.DataFrame(current_attachment_list).reset_index(drop=True)

    # Ensure all UEs from current tick are present in current_attachment
    current_attachment = pd.merge(ue_data_for_current_tick[['ue_id']].drop_duplicates(), current_attachment, on='ue_id', how='left')

    return current_attachment

def _check_hyst_in_current_tick(ue_data_for_current_tick: pd.DataFrame, current_attachment: pd.DataFrame, past_attachment: pd.DataFrame, hyst: float) -> pd.DataFrame:
    if current_attachment.shape != past_attachment.shape:
        raise AssertionError('current attachment and past attachment are not consistent. Check their shape, ue_id and cell_id columns.')
    elif set(current_attachment['ue_id']) != set(past_attachment['ue_id']):
        raise AssertionError('Error 2')

    for i, curr in current_attachment.iterrows():
        prev = past_attachment[past_attachment['ue_id'] == curr['ue_id']].iloc[0]
        if curr['cell_id'] == prev['cell_id']:
            continue
        curr_attachment_rxpower = ue_data_for_current_tick[(ue_data_for_current_tick['ue_id'] == curr['ue_id']) & (ue_data_for_current_tick['cell_id'] == curr['cell_id'])]['cell_rxpower_dbm'].values[0]
        try:
            prev_attachment_rxpower = ue_data_for_current_tick[(ue_data_for_current_tick['ue_id'] == prev['ue_id']) & (ue_data_for_current_tick['cell_id'] == prev['cell_id'])]['cell_rxpower_dbm'].values[0]
        except:
            prev_attachment_rxpower = -np.inf
        if curr_attachment_rxpower < prev_attachment_rxpower + hyst:
            current_attachment.at[i, 'cell_id'] = prev['cell_id']
            current_attachment.at[i, 'cell_rxpower_dbm'] = prev_attachment_rxpower

    return current_attachment

def check_rlf_threshold(df, current_tick_df, rlf_threshold):
    updated_df = df.copy()
    for ue_id in df['ue_id']:
        df_ue = df[df['ue_id'] == ue_id]
        current_tick_ue = current_tick_df[current_tick_df['ue_id'] == ue_id]
        if not df_ue.empty and not current_tick_ue.empty:
            max_sinr = df_ue['sinr_db'].values[0]
            if max_sinr >= rlf_threshold:
                continue
            else:
                max_sinr_current_tick = current_tick_ue['sinr_db'].max()
                if max_sinr_current_tick >= rlf_threshold:
                    updated_df.loc[updated_df['ue_id'] == ue_id, 'sinr_db'] = max_sinr_current_tick
                    updated_df.loc[updated_df['ue_id'] == ue_id, 'cell_id'] = current_tick_ue['cell_id'].values[0]
                    updated_df.loc[updated_df['ue_id'] == ue_id, 'cell_rxpower_dbm'] = current_tick_ue['cell_rxpower_dbm'].values[0]
                    updated_df.loc[updated_df['ue_id'] == ue_id, 'cell_lat'] = current_tick_ue['cell_lat'].values[0]
                    updated_df.loc[updated_df['ue_id'] == ue_id, 'cell_lon'] = current_tick_ue['cell_lon'].values[0]
                    updated_df.loc[updated_df['ue_id'] == ue_id, 'cell_carrier_freq_mhz'] = current_tick_ue['cell_carrier_freq_mhz'].values[0]
                    updated_df.loc[updated_df['ue_id'] == ue_id, 'cell_az_deg'] = current_tick_ue['cell_az_deg'].values[0]
                    updated_df.loc[updated_df['ue_id'] == ue_id, 'distance_km'] = current_tick_ue['distance_km'].values[0]
                    updated_df.loc[updated_df['ue_id'] == ue_id, 'relative_bearing'] = current_tick_ue['relative_bearing'].values[0]
                else:
                    updated_df.loc[updated_df['ue_id'] == ue_id, 'sinr_db'] = -np.inf
                    updated_df.loc[updated_df['ue_id'] == ue_id, 'cell_id'] = "RLF"
                    updated_df.loc[updated_df['ue_id'] == ue_id, 'cell_rxpower_dbm'] = -np.inf
    return updated_df

# def perform_attachment_hyst_ttt_per_tick(ue_data_for_current_tick, strongest_server_history, past_attachment, ttt, hyst, use_strongest_server = False):
#     current_strongest = ue_data_for_current_tick.loc[ue_data_for_current_tick.groupby('ue_id')['cell_rxpower_dbm'].idxmax()]
#     if len(strongest_server_history) >= ttt:
#         raise AssertionError("Error: Strongest_Server_History needs to be Less Than TTT!")
#     else:
#         if use_strongest_server:
#             current_attachment = current_strongest
#             strongest_server_history.append(current_strongest)

#         else:
#             if ttt == len(strongest_server_history) + 1:
#                 current_strongest = _update_current_strongest(ue_data_for_current_tick, past_attachment, hyst)
#                 strongest_server_history.append(current_strongest)
#                 current_attachment = _update_current_attachment(strongest_server_history, ue_data_for_current_tick, past_attachment)
#                 current_attachment = _check_hyst_in_current_tick(ue_data_for_current_tick, current_attachment, past_attachment, hyst)
#             else:
#                 raise AssertionError("Length of Strongest Server History must be EQUALS to TTT - 1.\n Call Perform Attachment with use_strongest_server = True")

#     if len(strongest_server_history) == ttt:
#         strongest_server_history.pop(0)

#     return strongest_server_history, current_attachment



## perform_attachment_hyst_ttt_per_tick Modified:
To accommodate/fix Error while executing RL episode, did some refactoring without changing the base logic 

Error: Strongest_Server_History needs to be Less Than TTT!



In [None]:
def perform_attachment_hyst_ttt_per_tick(ue_data_for_current_tick, strongest_server_history, past_attachment, ttt, hyst, use_strongest_server = False):
    # --- START FIX ---
    # Trim history if the new ttt requires a shorter history than currently stored.
    # Ensure history length is at most ttt-1 before adding the current tick's info.
    while len(strongest_server_history) >= ttt and ttt > 0: # Add ttt > 0 check for safety
         strongest_server_history.pop(0) # Remove the oldest entry
    # --- END FIX ---

    current_strongest = ue_data_for_current_tick.loc[ue_data_for_current_tick.groupby('ue_id')['cell_rxpower_dbm'].idxmax()]

    # --- the rest of the function's logic ---
    if use_strongest_server:
        current_attachment = current_strongest.copy()
        strongest_server_history.append(current_strongest.copy())
    else:
        # This block is entered when len(history) == ttt - 1 (due to trimming + use_strongest_server_flag logic)
        current_strongest = _update_current_strongest(ue_data_for_current_tick, past_attachment, hyst)
        strongest_server_history.append(current_strongest.copy()) # Now len(history) == ttt
        current_attachment = _update_current_attachment(strongest_server_history, ue_data_for_current_tick, past_attachment)

    if len(strongest_server_history) == ttt:
        strongest_server_history.pop(0)

    return strongest_server_history, current_attachment

In [201]:
def perform_attachment_hyst_ttt(ue_data, hyst, ttt, rlf_threshold):
    strongest_server_hystory= []
    current_attachment = pd.DataFrame()

    tick_dataframes = {}
    for tick in sorted(ue_data['tick'].unique()):
        tick_dataframes[tick] = ue_data[ue_data['tick'] == tick].copy()

    cell_attached_df = pd.DataFrame()
    for tick in range(len(tick_dataframes)):
        if ttt - 1 > len(strongest_server_hystory):
            strongest_server_history, current_attachment = perform_attachment_hyst_ttt_per_tick(tick_dataframes[tick],strongest_server_hystory,current_attachment,ttt,hyst,use_strongest_server = True)
        else:
            strongest_server_history, current_attachment = perform_attachment_hyst_ttt_per_tick(tick_dataframes[tick],strongest_server_hystory,current_attachment,ttt,hyst,use_strongest_server = False)
        current_attachment = check_rlf_threshold(current_attachment, tick_dataframes[tick], rlf_threshold)
        cell_attached_df = pd.concat([cell_attached_df,current_attachment])

    return cell_attached_df

## Execute `perform_attachment_hyst_ttt`

In [202]:
preproceseed_prediction_ue_data.head()

Unnamed: 0,ue_id,loc_x,loc_y,tick,cell_lat,cell_lon,cell_id,cell_az_deg,cell_carrier_freq_mhz,distance_km,cell_rxpower_dbm,sinr_db
0,0.0,-22.625309,59.806764,0.0,-90.0,-180.0,cell_1,0,2100,16.6295,-100.31197,-24.173384
1,0.0,-22.625309,59.806764,0.0,0.0,0.0,cell_2,120,2100,15.752768,-99.841523,-24.173384
2,0.0,-22.625309,59.806764,0.0,90.0,180.0,cell_3,240,2100,15.027772,-99.432278,-24.173384
3,1.0,119.764151,54.857584,0.0,-90.0,-180.0,cell_1,0,2100,16.595905,-100.294405,-23.848467
4,1.0,119.764151,54.857584,0.0,0.0,0.0,cell_2,120,2100,16.289273,-100.13242,-23.848467


In [203]:
final_df = perform_attachment_hyst_ttt(preproceseed_prediction_ue_data, hyst=hyst, ttt=ttt, rlf_threshold=rlf_threshold)
final_df.head()

Unnamed: 0,ue_id,loc_x,loc_y,tick,cell_lat,cell_lon,cell_id,cell_az_deg,cell_carrier_freq_mhz,distance_km,cell_rxpower_dbm,sinr_db
2,0.0,-22.625309,59.806764,0.0,90.0,180.0,cell_3,240,2100,15.027772,-99.432278,-24.173384
5,1.0,119.764151,54.857584,0.0,90.0,180.0,cell_3,240,2100,15.179563,-99.519571,-23.848467
6,2.0,72.095437,-20.253892,0.0,-90.0,-180.0,RLF,0,2100,15.865016,-inf,-inf
9,3.0,-67.548009,-38.100941,0.0,-90.0,-180.0,cell_1,0,2100,15.569455,-99.739854,-24.474944
12,4.0,59.867089,-83.10393,0.0,-90.0,-180.0,cell_1,0,2100,13.551107,-98.533881,-23.485879


## CALCULATE MRO OLD:

In [1]:
def count_rlf(df):
    """
    Counts the number of times a mock_ue_id switches from any cell_id to 'RLF'.
    """
    return (df['cell_id'] == "RLF").sum()

def count_switches(df):
    """
    Counts the number of times a ue_id switches cell_id to a different one (excluding 'RLF').
    """
    count = 0
    df = df.sort_values(by=['ue_id', 'tick'])  # Ensure correct order
    prev_cells = {}
    prev_ticks = {}

    for _, row in df.iterrows():
        ue_id, cell_id, tick = row['ue_id'], row['cell_id'], row['tick']

        if ue_id in prev_cells and prev_cells[ue_id] != cell_id and prev_cells[ue_id] is not None:
            if tick == prev_ticks[ue_id] + 1 and cell_id != "RLF":
                count += 1

        prev_cells[ue_id] = cell_id
        prev_ticks[ue_id] = tick

    return count

def calculate_mro_metric(ns_handover_count, nf_handover_count, prediction_ue_data):
    # Constants for interruption times
    ts = 50 / 1000  # Convert ms to seconds
    t_nas = 1000 / 1000  # Convert ms to seconds

    # Calculate total time (T) based on ticks; assuming each tick represents a uniform time slice
    # This could be adjusted if ticks represent variable time slices
    # Rather than passing the UE Data as whole we can send just an integar for tick
    ticks = len(prediction_ue_data['tick'].unique())
    tick_duration_seconds = 10000  # 10,000 second per tick
    T = ticks * tick_duration_seconds

    # Calculate D
    D = T - (ns_handover_count * ts + nf_handover_count * t_nas)

    return D

In [197]:
print(calculate_mro_metric(count_switches(final_df), count_rlf(final_df), final_df))

999799.45


## RL for Optimising hyst and ttt

In [None]:
import gymnasium as gym
from gymnasium import spaces # Use gymnasium spaces

import pandas as pd
import numpy as np
import math
# Import other necessary libraries like stable_baselines3, BaseCallback etc.
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback
from sklearn.model_selection import train_test_split

In [None]:
# Let's define the training and testing data based on ticks
unique_ticks = sorted(preproceseed_prediction_ue_data['tick'].unique())
train_ticks = unique_ticks[:80]
test_ticks = unique_ticks[80:]

train_data = preproceseed_prediction_ue_data[preproceseed_prediction_ue_data['tick'].isin(train_ticks)].copy()
test_data = preproceseed_prediction_ue_data[preproceseed_prediction_ue_data['tick'].isin(test_ticks)].copy()

train_data.head()

In [199]:
test_data.head()

Unnamed: 0,ue_id,loc_x,loc_y,tick,cell_lat,cell_lon,cell_id,cell_az_deg,cell_carrier_freq_mhz,distance_km,cell_rxpower_dbm,sinr_db
4800,0.0,-15.432484,54.840405,80.0,-90.0,-180.0,cell_1,0,2100,16.595786,-100.294343,-24.173384
4801,0.0,-15.432484,54.840405,80.0,0.0,0.0,cell_2,120,2100,15.650544,-99.784975,-24.173384
4802,0.0,-15.432484,54.840405,80.0,90.0,180.0,cell_3,240,2100,15.180052,-99.519851,-24.173384
4803,1.0,123.680712,74.603708,80.0,-90.0,-180.0,cell_1,0,2100,16.723695,-100.361031,-23.848467
4804,1.0,123.680712,74.603708,80.0,0.0,0.0,cell_2,120,2100,16.209872,-100.089977,-23.848467


## New MRO

In [220]:
def count_switches_and_pingpongs(df, pp_window=5): # pp_window in ticks
    """
    Counts total successful handovers and ping-pong handovers.
    A ping-pong is defined as A->B->A within pp_window ticks.
    """
    df = df.sort_values(by=['ue_id', 'tick'])
    ho_count = 0
    pp_count = 0
    # Store recent history: ue_id -> list of (tick, cell_id) tuples
    history = {}

    for _, row in df.iterrows():
        ue_id, cell_id, tick = row['ue_id'], row['cell_id'], row['tick']

        if ue_id not in history:
            history[ue_id] = [(tick, cell_id)]
            continue

        last_tick, last_cell = history[ue_id][-1]

        # Check for handover (excluding RLF)
        if cell_id != last_cell and cell_id != "RLF" and last_cell != "RLF":
             # Ensure handover is between consecutive ticks for counting (optional, depends on data density)
             # if tick == last_tick + 1: # Uncomment if only consecutive tick HOs count
             ho_count += 1
             # Check for Ping-Pong (A->B->A pattern)
             # Need at least 2 previous entries for A->B->A
             if len(history[ue_id]) >= 2:
                 prev_tick, prev_cell = history[ue_id][-2]
                 # Is the current cell the same as the one before the last one?
                 # Is it within the time window?
                 if cell_id == prev_cell and (tick - prev_tick) <= pp_window:
                     pp_count += 1

        # Update history for this UE
        history[ue_id].append((tick, cell_id))
        # Optional: Trim history to keep it manageable (e.g., keep last pp_window + 5 entries)
        # history[ue_id] = history[ue_id][-(pp_window + 5):]

    return ho_count, pp_count

def calculate_mro_metric_v2(handovers, pingpongs, rlfs, total_ticks, base_ho_penalty_s=0.1, pp_extra_penalty_s=0.5, rlf_penalty_s=1.0):
    """
    Calculates an MRO score penalizing HOs, Ping-Pongs (extra), and RLFs.
    Uses penalties directly instead of calculating total time T.
    Goal is typically to MAXIMIZE this score (starts high, decreases with penalties).
    Or flip signs to MINIMIZE penalties. Let's aim to maximize.

    Args:
        handovers (int): Total successful handovers.
        pingpongs (int): Number of ping-pong handovers (subset of handovers).
        rlfs (int): Number of Radio Link Failures.
        total_ticks (int): Total duration in ticks (proxy for opportunity).
        base_ho_penalty_s (float): Penalty for each normal handover (seconds equivalent).
        pp_extra_penalty_s (float): *Additional* penalty for ping-pong (seconds equivalent).
        rlf_penalty_s (float): Penalty for each RLF (seconds equivalent).

    Returns:
        float: MRO score. Higher is better.
    """
    # Start with a base score proportional to duration
    # We need a consistent baseline if not using T from original formula
    # Let's use total_ticks as a simple baseline score (or 0 if minimizing penalties)
    base_score = 0 # Let's minimize total penalty instead

    normal_handovers = handovers - pingpongs
    total_penalty = (normal_handovers * base_ho_penalty_s) + \
                    (pingpongs * (base_ho_penalty_s + pp_extra_penalty_s)) + \
                    (rlfs * rlf_penalty_s)

    # Return negative penalty (higher score is better)
    return -total_penalty

    # --- OR --- Alternative: Maximize uptime-like metric (closer to original)
    # tick_duration_s = 1.0 # Define a more realistic tick duration if needed
    # effective_T = total_ticks * tick_duration_s
    # score = effective_T - total_penalty
    # return score

## Using old MRO metric 

In [225]:
# %%
# Replace 'import gym' with:
import gymnasium as gym
from gymnasium import spaces # Use gymnasium spaces

import pandas as pd
import numpy as np
import math
# Import other necessary libraries like stable_baselines3, BaseCallback etc.
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback
from sklearn.model_selection import train_test_split

# (Keep your helper functions like GISTools, connect_ue_to_all_cells, etc.)
# ...
# (Keep your MRO metric functions like calculate_mro_metric, count_switches, count_rlf)
# ...

# %%
# Define the RL Environment using Gymnasium
class MRO_Env(gym.Env): # Inherit from gymnasium.Env
    metadata = {"render_modes": [], "render_fps": 4} # Optional but good practice

    def __init__(self, data, topology, reward_function, train_mode=True):
        super().__init__() # Use super().__init__()
        self.data = data
        self.topology = topology
        self.reward_function = reward_function
        self.train_mode = train_mode
        self.ticks = sorted(data['tick'].unique())
        self.current_tick_index = 0
        self.max_ticks = len(self.ticks)
        self.hyst_range = [0.0, 5.0]
        self.ttt_range = [1, 10]
        # Note: self.ttt is set dynamically in step

        # Define action space using gymnasium.spaces
        self.action_space = spaces.Box(low=np.array([self.hyst_range[0], self.ttt_range[0]]),
                                         high=np.array([self.hyst_range[1], self.ttt_range[1]]),
                                         dtype=np.float32)

        # Define observation space using gymnasium.spaces
        self.observation_space = spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32)

        self.strongest_server_history = []
        # Initialize past_attachment with correct columns
        self.required_cols_for_history = ['ue_id', 'cell_id', 'tick', 'cell_rxpower_dbm']
        self.past_attachment = pd.DataFrame(columns=self.required_cols_for_history)
        self.past_attachment_history = []

    # Update reset signature for Gymnasium
    def reset(self, seed=None, options=None):
        super().reset(seed=seed) # Call super().reset(seed=seed)

        self.current_tick_index = 0
        self.strongest_server_history = []
        self.past_attachment = pd.DataFrame(columns=self.required_cols_for_history) # Re-initialize
        self.past_attachment_history = []

        observation = np.array([self.current_tick_index / self.max_ticks], dtype=np.float32)
        info = {} # Standard info dict for reset

        # Gymnasium reset returns obs, info
        return observation, info

    # Ensure step returns 5 values
    def step(self, action):
        hyst, ttt_action = action[0], action[1]
        hyst = np.clip(hyst, self.hyst_range[0], self.hyst_range[1])
        # Ensure ttt is at least 1 after rounding
        ttt = max(1, int(round(np.clip(ttt_action, self.ttt_range[0], self.ttt_range[1]))))
        # self.ttt = ttt # No need to store self.ttt if only used locally in step

        # Boundary check for safety
        if self.current_tick_index >= self.max_ticks:
             print(f"Warning: Step called at or beyond max_ticks ({self.current_tick_index}/{self.max_ticks}). Returning dummy values.")
             # Return dummy values consistent with observation/action spaces and 5-tuple format
             last_observation = np.array([1.0], dtype=np.float32) # Represent end state
             return last_observation, 0, True, False, {"status": "already finished"}


        current_tick = self.ticks[self.current_tick_index]
        current_tick_data = self.data[self.data['tick'] == current_tick].copy()

        # print(f"****** Start Tick: {current_tick}, Hyst: {hyst:.2f}, TTT: {ttt}") # Optional print

        use_strongest_server_flag = len(self.strongest_server_history) < ttt - 1

        # Call the simulation function (ensure it's defined correctly above)
        self.strongest_server_history, current_attachment = perform_attachment_hyst_ttt_per_tick(
            current_tick_data, self.strongest_server_history, self.past_attachment.copy(), ttt, hyst, use_strongest_server=use_strongest_server_flag
        )

        # Update self.past_attachment safely, ensuring required columns are present
        if not current_attachment.empty:
            # Ensure tick column exists (it should, but double-check)
            if 'tick' not in current_attachment.columns:
                 current_attachment['tick'] = current_tick

            if all(col in current_attachment.columns for col in self.required_cols_for_history):
                self.past_attachment = current_attachment[self.required_cols_for_history].copy()
            # else: Keep the previous self.past_attachment if columns are missing
        # else: Keep the previous self.past_attachment if current is empty

        # --- Calculate reward only at the end ---
        reward = 0
        terminated = False # Replaces 'done' for natural termination
        truncated = False # Replaces 'done' for time limit / artificial termination

        self.current_tick_index += 1

        # Check if episode is finished
        if self.current_tick_index == self.max_ticks:
            # Set appropriate done flag: terminated=False, truncated=True is common for time limits
            terminated = False
            truncated = True # Episode ended because max_ticks reached

            if not self.past_attachment_history:
                 print("Warning: past_attachment_history is empty at the end of the episode.")
                 handovers = 0
                 rlf_count = 0
                 reward = 0 # Or some baseline/penalty
            else:
                 final_df_episode = pd.concat(self.past_attachment_history).reset_index(drop=True)
                 if not final_df_episode.empty:
                      handovers = count_switches(final_df_episode)
                      rlf_count = count_rlf(final_df_episode)
                      reward = self.reward_function(handovers, rlf_count, self.data)
                 else: # Should not happen if history was not empty, but safe check
                      handovers = 0
                      rlf_count = 0
                      reward = 0

            # Reset history only when episode truly ends (handled by SB3/Monitor usually)
            # self.past_attachment_history = [] # Let Monitor handle reset logic
        else:
            # Append the current state *before* potentially finishing
            if not self.past_attachment.empty: # Append only if valid attachment exists
                self.past_attachment_history.append(self.past_attachment.copy())

        # Prepare observation for the *next* state
        # Use current_tick_index because it reflects the state *after* the step
        observation = np.array([self.current_tick_index / self.max_ticks], dtype=np.float32)
        info = {} # Store additional info if needed

        return observation, reward, terminated, truncated, info

    def close(self):
        # Implement any necessary cleanup
        pass

# %%
# (RewardCallback definition should be here if not already defined)
from stable_baselines3.common.callbacks import BaseCallback

class RewardCallback(BaseCallback):
    """
    A simple callback that can be used to print information during training.
    Modify it to log or print specific details as needed.
    """
    def __init__(self, verbose=0):
        super(RewardCallback, self).__init__(verbose)
        self.episode_rewards = []
        self.episode_lengths = []

    def _on_step(self) -> bool:
        # Check if episode is finished (using dones flag from locals, which combines terminated/truncated)
        if self.locals["dones"][0]:
            info = self.locals["infos"][0]
            if 'episode' in info: # Check if Monitor wrapper added episode info
                print(f"Episode finished. Length={info['episode']['l']}, Reward={info['episode']['r']:.2f}")
                self.episode_rewards.append(info['episode']['r'])
                self.episode_lengths.append(info['episode']['l'])
        return True # Continue training

# %%
# Instantiate and train (ensure train_data, topology are defined)

# Assuming train_data and topology are loaded and preprocessed correctly
train_env = MRO_Env(train_data, topology, calculate_mro_metric, train_mode=True)
print("** train_env created (using gymnasium)")

# PPO should automatically wrap the env with Monitor and DummyVecEnv if it's not a VecEnv
model = PPO("MlpPolicy", train_env, verbose=1)
print("** model created")

reward_callback = RewardCallback()
print("** reward callback created")

total_timesteps = 10000 # Adjust as needed
model.learn(total_timesteps=total_timesteps, callback=reward_callback)

print("Training finished.")

## Using New MRO Metric

In [None]:
# Define the RL Environment using Gymnasium
class MRO_Env(gym.Env): # Inherit from gymnasium.Env
    metadata = {"render_modes": [], "render_fps": 4} # Optional but good practice

    def __init__(self, data, topology, reward_function, train_mode=True):
        super().__init__() # Use super().__init__()
        self.data = data
        self.topology = topology
        self.reward_function = reward_function
        self.train_mode = train_mode
        self.ticks = sorted(data['tick'].unique())
        self.current_tick_index = 0
        self.max_ticks = len(self.ticks)
        self.hyst_range = [0.0, 5.0]
        self.ttt_range = [1, 10]
        # Note: self.ttt is set dynamically in step

        # Define action space using gymnasium.spaces
        self.action_space = spaces.Box(low=np.array([self.hyst_range[0], self.ttt_range[0]]),
                                         high=np.array([self.hyst_range[1], self.ttt_range[1]]),
                                         dtype=np.float32)

        # Define observation space using gymnasium.spaces
        self.observation_space = spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32)

        self.strongest_server_history = []
        # Initialize past_attachment with correct columns
        self.required_cols_for_history = ['ue_id', 'cell_id', 'tick', 'cell_rxpower_dbm']
        self.past_attachment = pd.DataFrame(columns=self.required_cols_for_history)
        self.past_attachment_history = []

    # Update reset signature for Gymnasium
    def reset(self, seed=None, options=None):
        super().reset(seed=seed) # Call super().reset(seed=seed)

        self.current_tick_index = 0
        self.strongest_server_history = []
        self.past_attachment = pd.DataFrame(columns=self.required_cols_for_history) # Re-initialize
        self.past_attachment_history = []

        observation = np.array([self.current_tick_index / self.max_ticks], dtype=np.float32)
        info = {} # Standard info dict for reset

        # Gymnasium reset returns obs, info
        return observation, info

    # Ensure step returns 5 values
    def step(self, action):
        hyst, ttt_action = action[0], action[1]
        hyst = np.clip(hyst, self.hyst_range[0], self.hyst_range[1])
        # Ensure ttt is at least 1 after rounding
        ttt = max(1, int(round(np.clip(ttt_action, self.ttt_range[0], self.ttt_range[1]))))
        # self.ttt = ttt # No need to store self.ttt if only used locally in step

        # Boundary check for safety
        if self.current_tick_index >= self.max_ticks:
             print(f"Warning: Step called at or beyond max_ticks ({self.current_tick_index}/{self.max_ticks}). Returning dummy values.")
             # Return dummy values consistent with observation/action spaces and 5-tuple format
             last_observation = np.array([1.0], dtype=np.float32) # Represent end state
             return last_observation, 0, True, False, {"status": "already finished"}


        current_tick = self.ticks[self.current_tick_index]
        current_tick_data = self.data[self.data['tick'] == current_tick].copy()

        # print(f"****** Start Tick: {current_tick}, Hyst: {hyst:.2f}, TTT: {ttt}") # Optional print

        use_strongest_server_flag = len(self.strongest_server_history) < ttt - 1

        # Call the simulation function (ensure it's defined correctly above)
        self.strongest_server_history, current_attachment = perform_attachment_hyst_ttt_per_tick(
            current_tick_data, self.strongest_server_history, self.past_attachment.copy(), ttt, hyst, use_strongest_server=use_strongest_server_flag
        )

        # Update self.past_attachment safely, ensuring required columns are present
        if not current_attachment.empty:
            # Ensure tick column exists (it should, but double-check)
            if 'tick' not in current_attachment.columns:
                 current_attachment['tick'] = current_tick

            if all(col in current_attachment.columns for col in self.required_cols_for_history):
                self.past_attachment = current_attachment[self.required_cols_for_history].copy()
            # else: Keep the previous self.past_attachment if columns are missing
        # else: Keep the previous self.past_attachment if current is empty

        # --- Calculate reward only at the end ---
        reward = 0
        terminated = False # Replaces 'done' for natural termination
        truncated = False # Replaces 'done' for time limit / artificial termination

        self.current_tick_index += 1

        # Check if episode is finished
        if self.current_tick_index == self.max_ticks:
            # Set appropriate done flag: terminated=False, truncated=True is common for time limits
            terminated = False
            truncated = True # Episode ended because max_ticks reached

            if not self.past_attachment_history:
                 print("Warning: past_attachment_history is empty at the end of the episode.")
                 handovers = 0
                 rlf_count = 0
                 reward = 0 # Or some baseline/penalty
            else:
                 final_df_episode = pd.concat(self.past_attachment_history).reset_index(drop=True)
                 if not final_df_episode.empty:
                    # Use the new counting function
                    ho_count, pp_count = count_switches_and_pingpongs(final_df_episode, pp_window=5) # Adjust window as needed
                    rlf_count = count_rlf(final_df_episode)
                    total_ticks_in_episode = len(self.ticks) # Or self.max_ticks

                    # Use the new reward function
                    # *** Tune these penalty values! ***
                    reward = calculate_mro_metric_v2(ho_count, pp_count, rlf_count, total_ticks_in_episode,
                                                    base_ho_penalty_s=0.4,  # e.g., 100ms penalty per HO
                                                    pp_extra_penalty_s=1.4, # e.g., Additional 400ms for PP (total 500ms)
                                                    rlf_penalty_s=1.0)      # e.g., 1s penalty per RLF
                 else: # Should not happen if history was not empty, but safe check
                      handovers = 0
                      rlf_count = 0
                      reward = 0

            # Reset history only when episode truly ends (handled by SB3/Monitor usually)
            # self.past_attachment_history = [] # Let Monitor handle reset logic
        else:
            # Append the current state *before* potentially finishing
            if not self.past_attachment.empty: # Append only if valid attachment exists
                self.past_attachment_history.append(self.past_attachment.copy())

        # Prepare observation for the *next* state
        # Use current_tick_index because it reflects the state *after* the step
        observation = np.array([self.current_tick_index / self.max_ticks], dtype=np.float32)
        info = {} # Store additional info if needed

        return observation, reward, terminated, truncated, info

    def close(self):
        # Implement any necessary cleanup
        pass

# %%
# (RewardCallback definition should be here if not already defined)
from stable_baselines3.common.callbacks import BaseCallback

class RewardCallback(BaseCallback):
    """
    A simple callback that can be used to print information during training.
    Modify it to log or print specific details as needed.
    """
    def __init__(self, verbose=0):
        super(RewardCallback, self).__init__(verbose)
        self.episode_rewards = []
        self.episode_lengths = []

    def _on_step(self) -> bool:
        # Check if episode is finished (using dones flag from locals, which combines terminated/truncated)
        if self.locals["dones"][0]:
            info = self.locals["infos"][0]
            if 'episode' in info: # Check if Monitor wrapper added episode info
                print(f"Episode finished. Length={info['episode']['l']}, Reward={info['episode']['r']:.2f}")
                self.episode_rewards.append(info['episode']['r'])
                self.episode_lengths.append(info['episode']['l'])
        return True # Continue training

# %%
# Instantiate and train (ensure train_data, topology are defined)

# Assuming train_data and topology are loaded and preprocessed correctly
train_env = MRO_Env(train_data, topology, calculate_mro_metric, train_mode=True)
print("** train_env created (using gymnasium)")

# PPO should automatically wrap the env with Monitor and DummyVecEnv if it's not a VecEnv
model = PPO("MlpPolicy", train_env, verbose=1)
print("** model created")

reward_callback = RewardCallback()
print("** reward callback created")

total_timesteps = 10000 # Adjust as needed
model.learn(total_timesteps=total_timesteps, callback=reward_callback)

print("Training finished.")

## TEST RL:
Depending upon whether we used old MRO or New MRO, test_env accommodates the testing.

In [229]:
# ---------------------- Testing the RL Model ----------------------

# Instantiate the testing environment using the 'test_data'
test_env = MRO_Env(test_data, topology, calculate_mro_metric, train_mode=False)

# Reset the environment to get the initial observation
obs, _ = test_env.reset() # Use Gymnasium reset signature
done = False
total_reward_rl = 0
rl_hyst_values = []
rl_ttt_values = []

# Loop through the time steps of the test episode
while not done:
    # *** KEY STEP ***
    # Use the trained 'model' to predict the BEST action for the current observation
    # 'deterministic=True' means NO exploration noise, just exploit the learned policy
    action, _ = model.predict(obs, deterministic=True)

    # Take the chosen action in the test environment
    # Environment calculates next state, reward (at end), done flags based on this action
    obs, reward, terminated, truncated, info = test_env.step(action) # Use Gymnasium step signature
    done = terminated or truncated # Check if episode ended

    # Accumulate reward (remember reward is likely 0 until the very last step)
    total_reward_rl += reward

    # Store the actions chosen by the RL agent
    rl_hyst_values.append(action[0])
    rl_ttt_values.append(int(round(action[1]))) # Assuming ttt is the second action element

# Print the results for the RL agent
print(f"RL Model - Total MRO Metric on Test Data: {total_reward_rl}")
# (Optional: print hyst/ttt values)
print(f"RL Model - Hysteresis Values on Test Data: {rl_hyst_values}")
print(f"RL Model - Time-to-Trigger Values on Test Data: {rl_ttt_values}")

  gym.logger.warn(
  gym.logger.warn(


RL Model - Total MRO Metric on Test Data: -2.4000000000000004
RL Model - Hysteresis Values on Test Data: [0.004759658, 0.022449465, 0.04012832, 0.057772517, 0.07535865, 0.09286385, 0.11026597, 0.12754375, 0.144677, 0.16164668, 0.1784351, 0.19502586, 0.211404, 0.22755598, 0.24346964, 0.25913423, 0.2745404, 0.28968003, 0.30454642, 0.31913388]
RL Model - Time-to-Trigger Values on Test Data: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [231]:
import numpy as np
from scipy import stats

# Assuming rl_hyst_values and rl_ttt_values are populated after running the RL test block

if rl_hyst_values and rl_ttt_values: # Check if lists are not empty
    # --- Hysteresis Analysis ---
    avg_hyst = np.mean(rl_hyst_values)
    median_hyst = np.median(rl_hyst_values)

    # Print with high precision
    print(f"--- Hysteresis Analysis ---")
    print(f"Average Hyst: {avg_hyst:.10f}") # Increased precision
    print(f"Median Hyst: {median_hyst:.10f}") # Increased precision
    print(f"Min Hyst: {np.min(rl_hyst_values):.10f}")
    print(f"Max Hyst: {np.max(rl_hyst_values):.10f}")

    # --- TTT Analysis ---
    avg_ttt = np.mean(rl_ttt_values)
    median_ttt = np.median(rl_ttt_values)
    # Use pandas Series for a robust mode calculation
    mode_ttt_result = pd.Series(rl_ttt_values).mode()
    optimal_ttt_mode = int(mode_ttt_result[0]) if not mode_ttt_result.empty else 1 # Default to 1 if no mode

    print(f"\n--- Time-to-Trigger Analysis ---")
    print(f"Average TTT: {avg_ttt:.2f}")
    print(f"Median TTT: {median_ttt:.2f}")
    print(f"Mode TTT: {optimal_ttt_mode}")

    print("-" * 60)

    # Suggestion based on calculated values
    # Since hyst values are consistently very close to 0, median/average are similar
    suggested_hyst = median_hyst
    # TTT is consistently 1
    suggested_ttt = optimal_ttt_mode

    # Format suggested hyst to avoid scientific notation if very small
    hyst_format = ".4f" if suggested_hyst > 1e-4 else ".10f"

    print(f"Suggested fixed parameters: Hyst={suggested_hyst:{hyst_format}}, TTT={suggested_ttt}")

else:
    print("RL test results (hyst/ttt values) are empty. Cannot analyze.")

--- Hysteresis Analysis ---
Average Hyst: 0.1670195758
Median Hyst: 0.1700408906
Min Hyst: 0.0047596581
Max Hyst: 0.3191338778

--- Time-to-Trigger Analysis ---
Average TTT: 1.00
Median TTT: 1.00
Mode TTT: 1
------------------------------------------------------------
Suggested fixed parameters: Hyst=0.1700, TTT=1


## Baseline Test for new MRO

In [None]:
# ---------------------- Testing with Constant Values (Baseline) ----------------------

constant_hyst = 2.0 # Example fixed value
constant_ttt = 3    # Example fixed value

# --- Note: This part manually simulates, not using env.step fully ---
# It re-implements the simulation loop using fixed parameters

# Initialize history specific to this baseline test
constant_strongest_server_history = []
# Ensure past_attachment is initialized correctly
required_cols_for_history = ['ue_id', 'cell_id', 'tick', 'cell_rxpower_dbm'] # Make sure this matches env
constant_past_attachment = pd.DataFrame(columns=required_cols_for_history)
constant_past_attachment_history = [] # Store results for final calculation

test_ticks_baseline = sorted(test_data['tick'].unique()) # Get ticks from test_data

# Loop through ticks in the test data
for tick_idx, current_tick_constant in enumerate(test_ticks_baseline):
    current_tick_data_constant = test_data[test_data['tick'] == current_tick_constant].copy()

    # Determine if initial phase based on constant_ttt
    use_strongest_server_baseline = len(constant_strongest_server_history) < constant_ttt - 1

    # Manually call the core MRO logic function with CONSTANT hyst/ttt
    constant_strongest_server_history, current_attachment_baseline = perform_attachment_hyst_ttt_per_tick(
        current_tick_data_constant,
        constant_strongest_server_history,
        constant_past_attachment.copy(), # Pass copy
        constant_ttt,                   # Use fixed TTT
        constant_hyst,                  # Use fixed Hyst
        use_strongest_server=use_strongest_server_baseline
    )

    # Update past_attachment for the baseline simulation state
    if not current_attachment_baseline.empty:
         # Ensure tick column exists (it should, but double-check)
         if 'tick' not in current_attachment_baseline.columns:
              current_attachment_baseline['tick'] = current_tick_constant
         if all(col in current_attachment_baseline.columns for col in required_cols_for_history):
              constant_past_attachment = current_attachment_baseline[required_cols_for_history].copy()
              # Append valid attachment to history list
              constant_past_attachment_history.append(constant_past_attachment.copy())
         # else: constant_past_attachment remains unchanged if columns missing


# --- Calculate final reward for baseline ---
# Inside the baseline test block, after the loop:
if not constant_past_attachment_history:
    print("Warning: Baseline history is empty.")
    total_reward_constant = -np.inf # Or appropriate score for failure/no data
else:
    final_df_constant = pd.concat(constant_past_attachment_history).reset_index(drop=True)
    if not final_df_constant.empty:
        # *** Use the new counting function ***
        ho_count_const, pp_count_const = count_switches_and_pingpongs(final_df_constant, pp_window=5) # Use same window
        rlf_count_const = count_rlf(final_df_constant)
        total_ticks_in_episode_const = len(test_ticks_baseline) # Or len(test_data['tick'].unique())

        # *** Use the same NEW reward function as the RL Env ***
        total_reward_constant = calculate_mro_metric_v2(
            ho_count_const, pp_count_const, rlf_count_const, total_ticks_in_episode_const,
            base_ho_penalty_s=0.1,  # Use consistent penalty values
            pp_extra_penalty_s=0.4,
            rlf_penalty_s=1.0
        )
    else:
        total_reward_constant = -np.inf # Or appropriate score


print(f"Constant Values - Total MRO Metric on Test Data: {total_reward_constant}")
print(f"Constant Hysteresis: {constant_hyst}, Constant Time-to-Trigger: {constant_ttt}")

## Baseline Test for old MRO

In [None]:
# ---------------------- Testing with Constant Values (Baseline) ----------------------

constant_hyst = 2.0 # Example fixed value
constant_ttt = 3    # Example fixed value

# --- Note: This part manually simulates, not using env.step fully ---
# It re-implements the simulation loop using fixed parameters

# Initialize history specific to this baseline test
constant_strongest_server_history = []
# Ensure past_attachment is initialized correctly
required_cols_for_history = ['ue_id', 'cell_id', 'tick', 'cell_rxpower_dbm'] # Make sure this matches env
constant_past_attachment = pd.DataFrame(columns=required_cols_for_history)
constant_past_attachment_history = [] # Store results for final calculation

test_ticks_baseline = sorted(test_data['tick'].unique()) # Get ticks from test_data

# Loop through ticks in the test data
for tick_idx, current_tick_constant in enumerate(test_ticks_baseline):
    current_tick_data_constant = test_data[test_data['tick'] == current_tick_constant].copy()

    # Determine if initial phase based on constant_ttt
    use_strongest_server_baseline = len(constant_strongest_server_history) < constant_ttt - 1

    # Manually call the core MRO logic function with CONSTANT hyst/ttt
    constant_strongest_server_history, current_attachment_baseline = perform_attachment_hyst_ttt_per_tick(
        current_tick_data_constant,
        constant_strongest_server_history,
        constant_past_attachment.copy(), # Pass copy
        constant_ttt,                   # Use fixed TTT
        constant_hyst,                  # Use fixed Hyst
        use_strongest_server=use_strongest_server_baseline
    )

    # Update past_attachment for the baseline simulation state
    if not current_attachment_baseline.empty:
         # Ensure tick column exists (it should, but double-check)
         if 'tick' not in current_attachment_baseline.columns:
              current_attachment_baseline['tick'] = current_tick_constant
         if all(col in current_attachment_baseline.columns for col in required_cols_for_history):
              constant_past_attachment = current_attachment_baseline[required_cols_for_history].copy()
              # Append valid attachment to history list
              constant_past_attachment_history.append(constant_past_attachment.copy())
         # else: constant_past_attachment remains unchanged if columns missing


# --- Calculate final reward for baseline ---
total_reward_constant = 0
if not constant_past_attachment_history:
    print("Warning: Baseline history is empty.")
else:
    final_df_constant = pd.concat(constant_past_attachment_history).reset_index(drop=True)
    if not final_df_constant.empty:
        handovers_constant = count_switches(final_df_constant)
        rlf_count_constant = count_rlf(final_df_constant)
        # Use the original reward function
        total_reward_constant = calculate_mro_metric(handovers_constant, rlf_count_constant, test_data)


print(f"Constant Values - Total MRO Metric on Test Data: {total_reward_constant}")
print(f"Constant Hysteresis: {constant_hyst}, Constant Time-to-Trigger: {constant_ttt}")

## Save Model

In [224]:
# Assuming 'model' trained PPO model
model_save_path = "./mro_ppo_model2.zip" # Choose a path and filename
model.save(model_save_path)

print(f"Model saved to {model_save_path}")

Model saved to ./mro_ppo_model2.zip
