In [59]:
import pandas as pd # for data manipulation 
import numpy as np
import networkx as nx # for drawing graphs
# import matplotlib.pyplot as plt # for drawing graphs
import os, sys, glob, math
import time
from multiprocessing.pool import Pool
from itertools import repeat
from tqdm import tqdm
import numba

def rssi_to_np(rssi):
    # Function to convert rssi data from string (e.g. "435 pW") to exp (435e-12)
    rssi_num = np.zeros(rssi.shape)
    index = 0
    for r in rssi:
        num = r[0:-2]
        expn = r[-2:]
        # print(num)
        # print(expn)
        if expn == " W":
            # print(num)
            # print(index)
            rssi_num[index] = float(num)
        elif expn == "mW":
            rssi_num[index] = float(num) * 1e-3
        elif expn == "uW":
            rssi_num[index] = float(num) * 1e-6
        elif expn == "nW":
            rssi_num[index] = float(num) * 1e-9
        elif expn == "pW":
            rssi_num[index] = float(num) * 1e-12
        else:
            print(expn)
            raise ValueError("Unhandled unit prefix")
        index += 1
    return rssi_num

def compile_micro_sim_data_v2(file_list):
    '''
    Function to compile data from the CSV files generated by each micro-simulation
    Update: To specifically return the rx_df, tx_df, mon_df and pd_df in lists, so that specific dfs can be accessed (instead of aggregating UAV dfs)
    Input: file_list - List of simulation files belonging to a certain scenario (micro-sim)
    Output: concatenates the raw data to UL and DL dataframes
    '''

    # Let's get the GCS dfs ===============================================================
    gcs_rx_file = [file for file in file_list if (('_GCS-' in file) and ('-Rx' in file))]
    gcs_tx_file = [file for file in file_list if (('_GCS-' in file) and ('-Tx' in file))]
    gcs_mon_file = [file for file in file_list if (('_GCS-' in file) and ('Wlan' in file))]
    gcs_pd_file = [file for file in file_list if (('_GCS-' in file) and ('PacketDrop' in file))]
    if len(gcs_rx_file) > 0:
        gcs_rx_df = pd.read_csv(gcs_rx_file[0])
    else:
        print("GCS RX File Missing")
        print(file_list[0])
    if len(gcs_tx_file) > 0:
        gcs_tx_df = pd.read_csv(gcs_tx_file[0])
    else:
        print("GCS TX File Missing")
        print(file_list[0])
    if len(gcs_pd_file) > 0:
        gcs_pd_df = pd.read_csv(gcs_pd_file[0])
    else:
        print("GCS PD File Missing")
        print(file_list[0])
    if len(gcs_mon_file) > 0:
        gcs_mon_df = pd.read_csv(gcs_mon_file[0]) # Mon file is optional
        gcs_mon_df["Addr"] = "192.168.0.1"
    else: 
        gcs_mon_df = None

    # Let's get the GW dfs ===============================================================
    gw_rx_file = [file for file in file_list if (('_GW-' in file) and ('-Rx' in file))]
    gw_tx_file = [file for file in file_list if (('_GW-' in file) and ('-Tx' in file))]
    gw_mon_file = [file for file in file_list if (('_GW-' in file) and ('Wlan' in file))]
    gw_pd_file = [file for file in file_list if (('_GW-' in file) and ('PacketDrop' in file))]
    if len(gw_rx_file) > 0:
        gw_rx_df = pd.read_csv(gw_rx_file[0])
    else:
        print("GW RX File Missing")
        print(file_list[0])
    if len(gw_tx_file) > 0:
        gw_tx_df = pd.read_csv(gw_tx_file[0])
    else:
        print("GW TX File Missing")
        print(file_list[0])
    if len(gw_pd_file) > 0:
        gw_pd_df = pd.read_csv(gw_pd_file[0])
    else:
        print("GW PD File Missing")
        print(file_list[0])
    if len(gw_mon_file) > 0:
        gw_mon_df = pd.read_csv(gw_mon_file[0]) # Mon file is optional
        gw_mon_df["Addr"] = "192.168.0.2"
    else:
        gw_mon_df = None

    # Let's get the UAVs dfs ===============================================================
    uavs_rx_df_list = []
    uavs_tx_df_list = []
    uavs_mon_df_list = []
    uavs_pd_df_list = []
    uav_rx_files = [file for file in file_list if (('_UAV-' in file) and ('-Rx' in file))]
    uav_tx_files = [file for file in file_list if (('_UAV-' in file) and ('-Tx' in file))]
    uav_mon_files = [file for file in file_list if (('_UAV-' in file) and ('Wlan' in file))]
    uav_pd_files = [file for file in file_list if (('_UAV-' in file) and ('PacketDrop' in file))]
    uav_rx_files.sort()
    uav_tx_files.sort()
    uav_mon_files.sort()
    uav_pd_files.sort()
    if len(uav_rx_files) > 0:
        for uav_rx_file in uav_rx_files:
            uavs_rx_df_list.append(pd.read_csv(uav_rx_file))
    else:
        print("UAV RX File(s) Missing")
        print(file_list[0])
    if len(uav_tx_files) > 0:
        for uav_tx_file in uav_tx_files:
            uavs_tx_df_list.append(pd.read_csv(uav_tx_file))
    else:
        print("UAV TX File(s) Missing")
        print(file_list[0])
    if len(uav_pd_files) > 0:
        for uav_pd_file in uav_pd_files:
            uavs_pd_df_list.append(pd.read_csv(uav_pd_file))
    else:
        print("UAV PD File(s) Missing")
        print(file_list[0])
    if len(uav_mon_files) > 0: # UAV mon files are optional now
        uav_member_index = 3
        for uav_mon_file in uav_mon_files:
            uav_mon_df = pd.read_csv(uav_mon_file)
            uav_mon_df["Addr"] = "192.168.0." + str(uav_member_index)
            uavs_mon_df_list.append(uav_mon_df)
            uav_member_index += 1
    else:
        uavs_mon_df_list = []

    rx_df_list = [gcs_rx_df, gw_rx_df] + uavs_rx_df_list
    tx_df_list = [gcs_tx_df, gw_tx_df] + uavs_tx_df_list
    pd_df_list = [gcs_pd_df, gw_pd_df] + uavs_pd_df_list
    mon_df_list = [gcs_mon_df, gw_mon_df] + uavs_mon_df_list

    # UNCOMMENT BELOW IF RSSI DATA WILL BE USED
    for rx_df in rx_df_list:
        rx_df["RSSI"] = rssi_to_np(rx_df["RSSI"])
    for pd_df in pd_df_list:
        pd_df["RSSI"] = rssi_to_np(pd_df["RSSI"])
    for mon_df in mon_df_list:
        if mon_df is not None:
            mon_df["RSSI"] = rssi_to_np(mon_df["RSSI"])

    return rx_df_list, tx_df_list, pd_df_list, mon_df_list

def process_dropped_packets_v5(tx_df, rx_df_list, pd_df_list, tx_index, delay_threshold, sending_interval=40, NP=10000):
    '''
    This function is to compile packet information from the tx, rx and pd dataframes, fopr downlink comm. (GCS to UAVs)
    tx_df: Tx DF 
    rx_df_list: List of Rx DFs, first one is for GCS, second for GW, subsequent DFs in the list for UAV 1, 2, ...
    pd_df_list: List of packet drop DFs, first one is for GCS, second for GW, subsequent DFs in the list for UAV 1, 2, ...
    tx_index: Index of Tx in pd_df_list (e.g. for GCS, tx_index = 0)
    delay_threshold: Delay threshold to consider if packet arrived too late
    sending_interval: Mean sending interval (used to determine UAV speed in the simulation)
    NP: Number of packets set for every 100m
    Output: pkt_df: DF containing info on packets from tx_df received and dropped 
    '''
    uav_speed = 100 * 1000 / NP / sending_interval # This is for estimating the U2G Distance when queue overflow happens (refer Omnet ini file)
    pkt_df = pd.DataFrame(columns = ['RxTime','TxTime','Packet_Name','Bytes','RSSI','U2G_SINR','U2U_SINR','U2G_BER','U2U_BER',
                                    'Hop_Count','Delay','Queueing_Time','Backoff_Time','U2G_Distance',
                                    'Incorrectly_Rcvd','Queue_Overflow','Interface_Down','Number_Dropped','Packet_State'])
    for index, row in tqdm(tx_df.iterrows()):
        packetName = row["Packet_Name"] + "-" + str(row["Packet_Seq"])
        dest_addr = row["Dest_Addr"]
        rx_index = int(dest_addr.split(".")[-1]) - 1
        rx_df = rx_df_list[rx_index]

        # For each packet in gcs_tx_df, get the packet drops from GW and corresponding UAV
        pkt_drops_tx = pd_df_list[tx_index].loc[(pd_df_list[tx_index]["Packet_Name"] == packetName)] # Packets dropped at the transmitter, to catch QUEUE_OVERFLOW and INTERFACE_DOWN
        pkt_drops_gw = pd_df_list[1].loc[(pd_df_list[1]["Packet_Name"] == packetName)] # Packets dropped at the gateway UAV
        if rx_index != 1: # If not the GW, include packet drops at receiver. Else no need, cos GW is Rx
            pkt_drops_rx = pd_df_list[rx_index].loc[(pd_df_list[rx_index]["Packet_Name"] == packetName)] # Packets dropped at the receiver (GCS / UAV)
            pkt_drops = pd.concat([pkt_drops_tx, pkt_drops_gw, pkt_drops_rx], ignore_index = True)
        else:
            pkt_drops = pd.concat([pkt_drops_tx, pkt_drops_gw], ignore_index = True)

        if not pkt_drops.empty: # Find the packet drops for this particular packet
            drop_reasons = pkt_drops["Packet_Drop_Reason"].values # List of pkt drop reasons at GW and Rx and Tx
            # Count the occurences of each failure modes for a particular packet
            incorrect_rcvd = np.count_nonzero(drop_reasons == "INCORRECTLY_RECEIVED")
            queue_overflow = np.count_nonzero(drop_reasons == "QUEUE_OVERFLOW")
            retry_limit_excd = np.count_nonzero(drop_reasons == "RETRY_LIMIT_REACHED")
            interface_down = np.count_nonzero(drop_reasons == "INTERFACE_DOWN")
            num_drops = len(drop_reasons) # This is for counting drops due to incorrectly received only

            # Update pkt_df 
            if (packetName not in rx_df["Packet_Name"].values):
                # If not received, add the data of failed packet
                rx_time = max(pkt_drops["RxTime"].values)
                tx_time = min(pkt_drops["TxTime"].values)
                bytes = row["Bytes"]
                rssi = pkt_drops["RSSI"].mean() # This should be taking the max RSSI, but since it is not used, leaving it as mean for now
                u2g_sinr = max(pkt_drops["U2G_SINR"].values)
                u2g_ber = max(pkt_drops["U2G_BER"].values)
                delay = max(pkt_drops["Delay"].values)
                queueing_time = max(pkt_drops["Queueing_Time"].values)
                backoff_time = max(pkt_drops["Backoff_Time"].values)
                u2g_distance = max(pkt_drops["U2G_Distance"].values)
                # Packet State Based on Failure Mode
                if retry_limit_excd > 0:
                    pkt_state = "RETRY_LIMIT_REACHED" # The packet failed to be received (RETRY_LIMIT_EXCEEDED)
                    # If packet was dropped due to retry limit reach at the GW, then there may not be any U2G distance recorded. But knowing the speed, we can compute it
                    if math.isnan(u2g_distance):
                        u2g_distance = uav_speed * rx_time
                elif queue_overflow > 0:
                    pkt_state = "QUEUE_OVERFLOW" # The packet failed due to queue buffer overflow
                    # If packet was dropped due to queue overflow, then there will not be any U2G distance recorded. But knowing the speed, we can compute it
                    if math.isnan(u2g_distance):
                        u2g_distance = uav_speed * rx_time
                elif interface_down > 0:
                    pkt_state = "INTERFACE_DOWN" # The packet failed due to interface down
                else:
                    pkt_state = "FAILED" # Unaccounted fail reason
                    print("Packet Failure Mode Unknown")
                # Check for U2U Data
                if (len(pkt_drops["U2U_SINR"].values) > 0): # There may not always be a U2U communication
                    u2u_sinr = max(pkt_drops["U2U_SINR"].values)
                    u2u_ber = max(pkt_drops["U2U_BER"].values)
                    hop_count = 2
                else:
                    u2u_sinr = None
                    u2u_ber = None
                    hop_count = 1

                failed_pkt = pd.DataFrame([{'RxTime': rx_time,'TxTime': tx_time,'Packet_Name': packetName,'Bytes': bytes,'RSSI': rssi,'U2G_SINR': u2g_sinr,'U2U_SINR': u2u_sinr,
                              'U2G_BER': u2g_ber,'U2U_BER': u2u_ber,'Hop_Count': hop_count,'Delay': delay,'Queueing_Time': queueing_time,'Backoff_Time': backoff_time,'U2G_Distance': u2g_distance,
                              'Incorrectly_Rcvd': incorrect_rcvd,'Queue_Overflow': queue_overflow,'Interface_Down': interface_down,'Number_Dropped': num_drops,'Packet_State': pkt_state}])
                pkt_df = pd.concat([pkt_df,failed_pkt], ignore_index = True)

            else:
                # If packet successfully received, update the number of tries and the reason for failed attempt(s) to the received packet info
                rcvd_pkt_df = rx_df.loc[(rx_df["Packet_Name"] == packetName)].copy()
                rcvd_pkt_df["Incorrectly_Rcvd"] = incorrect_rcvd
                rcvd_pkt_df["Queue_Overflow"] = queue_overflow
                rcvd_pkt_df["Interface_Down"] = interface_down
                rcvd_pkt_df["Number_Dropped"] = num_drops
                if rcvd_pkt_df["Delay"].values > delay_threshold:
                    rcvd_pkt_df["Packet_State"] = "DELAY_EXCEEDED"
                else:
                    rcvd_pkt_df["Packet_State"] = "RELIABLE"
                pkt_df = pd.concat([pkt_df,rcvd_pkt_df], ignore_index = True)

        elif (packetName in rx_df["Packet_Name"].values):
            # The packet was received without any retries
            rcvd_pkt_df = rx_df.loc[(rx_df["Packet_Name"] == packetName)].copy()
            rcvd_pkt_df["Incorrectly_Rcvd"] = 0
            rcvd_pkt_df["Queue_Overflow"] = 0
            rcvd_pkt_df["Interface_Down"] = 0
            rcvd_pkt_df["Number_Dropped"] = 0
            if rcvd_pkt_df["Delay"].values > delay_threshold:
                rcvd_pkt_df["Packet_State"] = "DELAY_EXCEEDED"
            else:
                rcvd_pkt_df["Packet_State"] = "RELIABLE"
            pkt_df = pd.concat([pkt_df,rcvd_pkt_df], ignore_index = True)
        
        # else:
        #     print("No packet drop recorded and packet not found in rx_df for packet: {}. This should not happen".format(packetName))

    pkt_df = pkt_df.sort_values("RxTime")
    pkt_df = pkt_df.reset_index()
    return pkt_df

def process_dropped_packets_dict(tx_df, rx_df_list, pd_df_list, tx_index, delay_threshold, sending_interval=40, NP=10000):
    '''
    This function is to compile packet information from the tx, rx and pd dataframes, fopr downlink comm. (GCS to UAVs)
    tx_df: Tx DF 
    rx_df_list: List of Rx DFs, first one is for GCS, second for GW, subsequent DFs in the list for UAV 1, 2, ...
    pd_df_list: List of packet drop DFs, first one is for GCS, second for GW, subsequent DFs in the list for UAV 1, 2, ...
    tx_index: Index of Tx in pd_df_list (e.g. for GCS, tx_index = 0)
    delay_threshold: Delay threshold to consider if packet arrived too late
    sending_interval: Mean sending interval (used to determine UAV speed in the simulation)
    NP: Number of packets set for every 100m
    Output: pkt_df: DF containing info on packets from tx_df received and dropped 
    '''
    uav_speed = 100 * 1000 / NP / sending_interval # This is for estimating the U2G Distance when queue overflow happens (refer Omnet ini file)
    pkt_df = pd.DataFrame(columns = ['RxTime','TxTime','Packet_Name','Bytes','RSSI','U2G_SINR','U2U_SINR','U2G_BER','U2U_BER',
                                    'Hop_Count','Delay','Queueing_Time','Backoff_Time','U2G_Distance',
                                    'Incorrectly_Rcvd','Queue_Overflow','Interface_Down','Number_Dropped','Packet_State'])
    
    tx_df_dict = tx_df.to_dict('records')
    packet_names = tx_df_dict[0]["Packet_Name"]
    for row in tqdm(tx_df_dict):
        packetName = packet_names + "-" + str(row["Packet_Seq"])
        dest_addr = row["Dest_Addr"]
        rx_index = int(dest_addr.split(".")[-1]) - 1
        rx_df = rx_df_list[rx_index]

        # For each packet in gcs_tx_df, get the packet drops from GW and corresponding UAV
        pkt_drops_tx = pd_df_list[tx_index].loc[(pd_df_list[tx_index]["Packet_Name"] == packetName)] # Packets dropped at the transmitter, to catch QUEUE_OVERFLOW and INTERFACE_DOWN
        pkt_drops_gw = pd_df_list[1].loc[(pd_df_list[1]["Packet_Name"] == packetName)] # Packets dropped at the gateway UAV
        if rx_index != 1: # If not the GW, include packet drops at receiver. Else no need, cos GW is Rx
            pkt_drops_rx = pd_df_list[rx_index].loc[(pd_df_list[rx_index]["Packet_Name"] == packetName)] # Packets dropped at the receiver (GCS / UAV)
            pkt_drops = pd.concat([pkt_drops_tx, pkt_drops_gw, pkt_drops_rx], ignore_index = True)
        else:
            pkt_drops = pd.concat([pkt_drops_tx, pkt_drops_gw], ignore_index = True)

        if not pkt_drops.empty: # Find the packet drops for this particular packet
            drop_reasons = pkt_drops["Packet_Drop_Reason"].values # List of pkt drop reasons at GW and Rx and Tx
            # Count the occurences of each failure modes for a particular packet
            incorrect_rcvd = np.count_nonzero(drop_reasons == "INCORRECTLY_RECEIVED")
            queue_overflow = np.count_nonzero(drop_reasons == "QUEUE_OVERFLOW")
            retry_limit_excd = np.count_nonzero(drop_reasons == "RETRY_LIMIT_REACHED")
            interface_down = np.count_nonzero(drop_reasons == "INTERFACE_DOWN")
            num_drops = len(drop_reasons) # This is for counting drops due to incorrectly received only

            # Update pkt_df 
            if (packetName not in rx_df["Packet_Name"].values):
                # If not received, add the data of failed packet
                rx_time = max(pkt_drops["RxTime"].values)
                tx_time = min(pkt_drops["TxTime"].values)
                bytes = row["Bytes"]
                rssi = pkt_drops["RSSI"].mean() # This should be taking the max RSSI, but since it is not used, leaving it as mean for now
                u2g_sinr = max(pkt_drops["U2G_SINR"].values)
                u2g_ber = max(pkt_drops["U2G_BER"].values)
                delay = max(pkt_drops["Delay"].values)
                queueing_time = max(pkt_drops["Queueing_Time"].values)
                backoff_time = max(pkt_drops["Backoff_Time"].values)
                u2g_distance = max(pkt_drops["U2G_Distance"].values)
                # Packet State Based on Failure Mode
                if retry_limit_excd > 0:
                    pkt_state = "RETRY_LIMIT_REACHED" # The packet failed to be received (RETRY_LIMIT_EXCEEDED)
                    # If packet was dropped due to retry limit reach at the GW, then there may not be any U2G distance recorded. But knowing the speed, we can compute it
                    if math.isnan(u2g_distance):
                        u2g_distance = uav_speed * rx_time
                elif queue_overflow > 0:
                    pkt_state = "QUEUE_OVERFLOW" # The packet failed due to queue buffer overflow
                    # If packet was dropped due to queue overflow, then there will not be any U2G distance recorded. But knowing the speed, we can compute it
                    if math.isnan(u2g_distance):
                        u2g_distance = uav_speed * rx_time
                elif interface_down > 0:
                    pkt_state = "INTERFACE_DOWN" # The packet failed due to interface down
                else:
                    pkt_state = "FAILED" # Unaccounted fail reason
                    print("Packet Failure Mode Unknown")
                # Check for U2U Data
                if (len(pkt_drops["U2U_SINR"].values) > 0): # There may not always be a U2U communication
                    u2u_sinr = max(pkt_drops["U2U_SINR"].values)
                    u2u_ber = max(pkt_drops["U2U_BER"].values)
                    hop_count = 2
                else:
                    u2u_sinr = None
                    u2u_ber = None
                    hop_count = 1

                failed_pkt = pd.DataFrame([{'RxTime': rx_time,'TxTime': tx_time,'Packet_Name': packetName,'Bytes': bytes,'RSSI': rssi,'U2G_SINR': u2g_sinr,'U2U_SINR': u2u_sinr,
                              'U2G_BER': u2g_ber,'U2U_BER': u2u_ber,'Hop_Count': hop_count,'Delay': delay,'Queueing_Time': queueing_time,'Backoff_Time': backoff_time,'U2G_Distance': u2g_distance,
                              'Incorrectly_Rcvd': incorrect_rcvd,'Queue_Overflow': queue_overflow,'Interface_Down': interface_down,'Number_Dropped': num_drops,'Packet_State': pkt_state}])
                pkt_df = pd.concat([pkt_df,failed_pkt], ignore_index = True)

            else:
                # If packet successfully received, update the number of tries and the reason for failed attempt(s) to the received packet info
                rcvd_pkt_df = rx_df.loc[(rx_df["Packet_Name"] == packetName)].copy()
                rcvd_pkt_df["Incorrectly_Rcvd"] = incorrect_rcvd
                rcvd_pkt_df["Queue_Overflow"] = queue_overflow
                rcvd_pkt_df["Interface_Down"] = interface_down
                rcvd_pkt_df["Number_Dropped"] = num_drops
                if rcvd_pkt_df["Delay"].values > delay_threshold:
                    rcvd_pkt_df["Packet_State"] = "DELAY_EXCEEDED"
                else:
                    rcvd_pkt_df["Packet_State"] = "RELIABLE"
                pkt_df = pd.concat([pkt_df,rcvd_pkt_df], ignore_index = True)

        elif (packetName in rx_df["Packet_Name"].values):
            # The packet was received without any retries
            rcvd_pkt_df = rx_df.loc[(rx_df["Packet_Name"] == packetName)].copy()
            rcvd_pkt_df["Incorrectly_Rcvd"] = 0
            rcvd_pkt_df["Queue_Overflow"] = 0
            rcvd_pkt_df["Interface_Down"] = 0
            rcvd_pkt_df["Number_Dropped"] = 0
            if rcvd_pkt_df["Delay"].values > delay_threshold:
                rcvd_pkt_df["Packet_State"] = "DELAY_EXCEEDED"
            else:
                rcvd_pkt_df["Packet_State"] = "RELIABLE"
            pkt_df = pd.concat([pkt_df,rcvd_pkt_df], ignore_index = True)
        
        # else:
        #     print("No packet drop recorded and packet not found in rx_df for packet: {}. This should not happen".format(packetName))

    pkt_df = pkt_df.sort_values("RxTime")
    pkt_df = pkt_df.reset_index()
    return pkt_df

def process_dropped_packets_v6(tx_df, rx_df, pd_df_list, delay_threshold, sending_interval=40, NP=10000):
    '''
    Date: 2/2/2023
    Update: Changed the algo to only process failed packets (found in Tx but missing from Rx), for packets sucessfully received, only update the retry count and the delay exceeded status
    This function is to compile packet information from the tx, rx and pd dataframes, for downlink comm. (GCS to UAVs)
    tx_df: Tx DF 
    rx_df: Rx DF
    pd_df_list: List of packet drop DFs, first one is for GCS, second for GW, subsequent DFs in the list for UAV 1, 2, ...
    delay_threshold: Delay threshold to consider if packet arrived too late
    sending_interval: Mean sending interval (used to determine UAV speed in the simulation)
    NP: Number of packets set for every 100m
    Output: rx_df: Modified rx_df containing info on packets from tx_df received and dropped 
    '''
    uav_speed = 100 * 1000 / NP / sending_interval # This is for estimating the U2G Distance when queue overflow happens (refer Omnet ini file)
    # First, update the status of packets in rx_df
    if "Unnamed: 16" in rx_df.columns:
        rx_df = rx_df.drop(["Unnamed: 16"], axis=1)
    rx_df["Packet_State"] = np.where(rx_df['Delay'] > delay_threshold , "Delay_Exceeded", "Reliable")

    # First, get the list of packets missing from Rx DF but transmitted in Tx DF
    packets_rcvd = rx_df["Packet_Name"].values
    tx_df["Packet_Seq"] = tx_df["Packet_Seq"].apply(str)
    tx_df["Packet_Full_Name"] = tx_df["Packet_Name"] + "-" + tx_df["Packet_Seq"]
    tx_df_failed = tx_df.loc[~(tx_df["Packet_Full_Name"].isin(packets_rcvd))]
    tx_df_failed_dict = tx_df_failed.to_dict('records')
    failed_pkt_list = [] # Using list to store failed packet instead of doing pd.concat for appending every packets. This should be much faster
    # Only iterating through packets that failed to be received
    for row in tqdm(tx_df_failed_dict):
        packetName = row["Packet_Full_Name"]
        # Use packet name to find the src of packet (NOTE: This makes the naming of packets important)
        packetType, packetSeq = packetName.split('-')
        if packetType == "CNCData":
            tx_index = 0
        elif packetType == "GatewayData":
            tx_index = 1
        else:
            tx_index = int(packetType.split("_")[-1]) + 2 # If the packet is not CNCData or GatewayData, it should be UAVData
        dest_addr = row["Dest_Addr"]
        src_addr = "192.168.0." + str(tx_index+1)
        rx_index = int(dest_addr.split(".")[-1]) - 1

        # For each packet in tx_df_failed, get the packet drops from GW and corresponding UAV
        pkt_drops_tx = pd_df_list[tx_index].loc[(pd_df_list[tx_index]["Packet_Name"] == packetName)] # Packets dropped at the transmitter, to catch QUEUE_OVERFLOW and INTERFACE_DOWN
        pkt_drops_gw = pd_df_list[1].loc[(pd_df_list[1]["Packet_Name"] == packetName)] # Packets dropped at the gateway UAV
        if rx_index != 1: # If not the GW, include packet drops at receiver. Else no need, cos GW is Rx
            pkt_drops_rx = pd_df_list[rx_index].loc[(pd_df_list[rx_index]["Packet_Name"] == packetName)] # Packets dropped at the receiver (GCS / UAV)
            pkt_drops = pd.concat([pkt_drops_tx, pkt_drops_gw, pkt_drops_rx], ignore_index = True)
        else:
            pkt_drops = pd.concat([pkt_drops_tx, pkt_drops_gw], ignore_index = True)

        if not pkt_drops.empty: # Find the packet drops for this particular packet
            drop_reasons = pkt_drops["Packet_Drop_Reason"].values # List of pkt drop reasons at GW and Rx and Tx
            # Count the occurences of each failure modes for a particular packet
            incorrect_rcvd = np.count_nonzero(drop_reasons == "INCORRECTLY_RECEIVED")
            queue_overflow = np.count_nonzero(drop_reasons == "QUEUE_OVERFLOW")
            retry_limit_excd = np.count_nonzero(drop_reasons == "RETRY_LIMIT_REACHED")
            interface_down = np.count_nonzero(drop_reasons == "INTERFACE_DOWN")

            # If not received, add the data of failed packet
            rx_time = max(pkt_drops["RxTime"].values)
            tx_time = min(pkt_drops["TxTime"].values)
            bytes = row["Bytes"]
            rssi = max(pkt_drops["RSSI"].values) # This should be taking the max RSSI, but since it is not used, leaving it as mean for now
            u2g_sinr = max(pkt_drops["U2G_SINR"].values)
            u2g_ber = max(pkt_drops["U2G_BER"].values)
            delay = max(pkt_drops["Delay"].values)
            queueing_time = max(pkt_drops["Queueing_Time"].values)
            backoff_time = max(pkt_drops["Backoff_Time"].values)
            u2g_distance = max(pkt_drops["U2G_Distance"].values)
            # Packet State Based on Failure Mode
            if retry_limit_excd > 0:
                pkt_state = "RETRY_LIMIT_REACHED" # The packet failed to be received (RETRY_LIMIT_EXCEEDED)
                # If packet was dropped due to retry limit reach at the GW, then there may not be any U2G distance recorded. But knowing the speed, we can compute it
                if math.isnan(u2g_distance):
                    u2g_distance = uav_speed * rx_time
            elif queue_overflow > 0:
                pkt_state = "QUEUE_OVERFLOW" # The packet failed due to queue buffer overflow
                # If packet was dropped due to queue overflow, then there will not be any U2G distance recorded. But knowing the speed, we can compute it
                if math.isnan(u2g_distance):
                    u2g_distance = uav_speed * rx_time
            elif interface_down > 0:
                pkt_state = "INTERFACE_DOWN" # The packet failed due to interface down
            else:
                pkt_state = "FAILED" # Unaccounted fail reason
                print("Packet Failure Mode Unknown")
            # Check for U2U Data
            if (len(pkt_drops["U2U_SINR"].values) > 0): # There may not always be a U2U communication
                u2u_sinr = max(pkt_drops["U2U_SINR"].values)
                u2u_ber = max(pkt_drops["U2U_BER"].values)
                hop_count = 2
            else:
                u2u_sinr = None
                u2u_ber = None
                hop_count = 1

            failed_pkt = {'RxTime': rx_time,'TxTime': tx_time,'Packet_Name': packetName,'Bytes': bytes,'RSSI': rssi,'U2G_SINR': u2g_sinr,'U2U_SINR': u2u_sinr,
                        'U2G_BER': u2g_ber,'U2U_BER': u2u_ber,'Src_Addr': src_addr,'Dest_Addr': dest_addr,'Hop_Count': hop_count,'Delay': delay,'Queueing_Time': queueing_time,'Backoff_Time': backoff_time,
                        'U2G_Distance': u2g_distance,'Retry_Count': incorrect_rcvd,'Packet_State': pkt_state}
            failed_pkt_list.append(failed_pkt)
        
        # else:
        #     print("No packet drop recorded and packet not found in rx_df for packet: {}. This should not happen".format(packetName))
    
    failed_pkt_df = pd.DataFrame(failed_pkt_list)
    rx_df = pd.concat([rx_df,failed_pkt_df], ignore_index = True)
    rx_df = rx_df.sort_values("RxTime")
    rx_df = rx_df.reset_index()
    return rx_df

def process_throughput(df, timeDiv):
    '''
    Function to calculate throughput data for a DataFrame
    timeDiv is the time division to use for calculating the throughput
    '''
    maxTime = math.ceil(float(df["RxTime"].max()))
    for i in range(math.ceil(maxTime / timeDiv)):
        df_in_range = df.loc[(df["RxTime"] >= (i*timeDiv)) & (df["RxTime"] < ((i+1)*timeDiv)) & (df["Packet_State"].isin(["Reliable","Delay_Exceeded"]))]
        totalBytes = df_in_range["Bytes"].sum()
        throughput = totalBytes / timeDiv
        df.loc[(df["RxTime"] >= (i*timeDiv)) & (df["RxTime"] < ((i+1)*timeDiv)), "Throughput"] = throughput
    return df

@numba.jit
def process_throughput_np(df_np, df_col_ind, timeDiv):
    '''
    Function to calculate throughput data for a DataFrame
    timeDiv is the time division to use for calculating the throughput
    NOTE: Assuming that df has been sorted based on "RxTime"
    '''
    np_arr_list = []
    # df_np = df.to_numpy()
    df_np = np.hstack((df_np,np.ones((df_np.shape[0],1)))) # Create column for throughput data
    # df_col_ind= dict(zip(df.columns, list(range(0,len(df.columns)))))
    df_col_ind["Throughput"] = df_np.shape[1] - 1
    maxTime = math.ceil(float(df_np[:,df_col_ind["RxTime"]].max()))
    for i in range(math.ceil(maxTime / timeDiv)):
        bool_filter = (df_np[:,df_col_ind["RxTime"]] >= (i*timeDiv)) & (df_np[:,df_col_ind["RxTime"]] < ((i+1)*timeDiv)) & ((df_np[:,df_col_ind["Packet_State"]] == "Reliable") | (df_np[:,df_col_ind["Packet_State"]] == "Delay_Exceeded"))
        if any(bool_filter):
            rows_in_range = df_np[bool_filter,:]
            totalBytes = rows_in_range[:,df_col_ind["Bytes"]].sum()
            throughput = totalBytes / timeDiv
            rows_in_range_index = np.where((df_np[:,df_col_ind["RxTime"]] >= (i*timeDiv)) & (df_np[:,df_col_ind["RxTime"]] < ((i+1)*timeDiv)))[0]
            df_np[rows_in_range_index[0]:rows_in_range_index[-1]+1, df_col_ind["Throughput"]] = throughput # For df_np, from the first to the last index in rows_in_range_index, update the throughput colnm

    df_cols = list(df_col_ind.keys())
    df_new = pd.DataFrame(df_np, columns=df_cols)
    return df_new

# import cudf
# def process_throughput_cudf(df, timeDiv):
#     '''
#     Function to calculate throughput data for a DataFrame
#     timeDiv is the time division to use for calculating the throughput
#     NOTE: IF USING CUDF, CANNOT USE MULTIPROCESSING!!!
#     ALSO THIS FUNCTION IS NOT WORKING
#     '''
#     maxTime = math.ceil(float(df["RxTime"].max()))
#     cu_df = cudf.from_pandas(df)
#     for i in range(math.ceil(maxTime / timeDiv)):
#         df_in_range = cu_df.loc[(cu_df["RxTime"] >= (i*timeDiv)) & (cu_df["RxTime"] < ((i+1)*timeDiv)) & (cu_df["Packet_State"].str.extract('(Reliable)').notna().values) & (cu_df["Packet_State"].str.extract('(Delay_Exceeded)').notna().values)]
#         totalBytes = df_in_range["Bytes"].sum()
#         throughput = totalBytes / timeDiv
#         cu_df.loc[(cu_df["RxTime"] >= (i*timeDiv)) & (cu_df["RxTime"] < ((i+1)*timeDiv)), "Throughput"] = throughput
#     return cu_df.to_pandas()

def process_scenario(scenario, sim_root_path, delay_threshold, NP, save_path):
    print(scenario)
    # Dataframes to store UL & DL raw data
    dl_df = pd.DataFrame(columns = ['RxTime','TxTime','Packet_Name','Bytes','RSSI','U2G_SINR','U2U_SINR','U2G_BER','U2U_BER','Hop_Count','Throughput',
                                    'Delay','Queueing_Time','Backoff_Time','U2G_Distance','Height','Inter_UAV_Distance','Num_Members','Sending_Interval',
                                    'Incorrectly_Rcvd','Queue_Overflow','Interface_Down','Number_Dropped','Packet_State']) # Downlink dataframe
    ul_df = pd.DataFrame(columns = ['RxTime','TxTime','Packet_Name','Bytes','RSSI','U2G_SINR','U2U_SINR','U2G_BER','U2U_BER','Hop_Count','Throughput',
                                    'Delay','Queueing_Time','Backoff_Time','U2G_Distance','Height','Inter_UAV_Distance','Num_Members','Sending_Interval',
                                    'Incorrectly_Rcvd','Queue_Overflow','Interface_Down','Number_Dropped','Packet_State']) # Downlink dataframe
    scenario_files = glob.glob(sim_root_path + "/{}_*.csv".format(scenario)) # Get list of csv files belonging to this scenario
    scenario_params = scenario.split('_')
    num_member = int(scenario_params[0].split('-')[-1])
    inter_uav_distance = int(scenario_params[1].split('-')[-1])
    height = int(scenario_params[2].split('-')[-1]) 
    sending_interval = int(scenario_params[5].split('-')[-1])
    rx_df_list, tx_df_list, pd_df_list, mon_df_list = compile_micro_sim_data_v2(scenario_files)
    
    # Process the state of each packets sent in DL
    start_dl_time = time.time()
    dl_data = process_dropped_packets_v5(tx_df_list[0], rx_df_list, pd_df_list, 0, delay_threshold, sending_interval, NP)
    if dl_data is not None:
        dl_data["Height"] = height
        dl_data["Inter_UAV_Distance"] = inter_uav_distance
        dl_data["Num_Members"] = num_member
        dl_data["Sending_Interval"] = sending_interval
        dl_data = process_throughput(dl_data, 1)
        dl_df = pd.concat([dl_df, dl_data], ignore_index=True)
        dl_df.to_csv(os.path.join(save_path,"{}_downlink.csv".format(scenario)), index=False)

    # Process the state of each packets sent in DL
    start_ul_time = time.time()
    for i in range(1,len(rx_df_list)):
        ul_data = process_dropped_packets_v5(tx_df_list[i], rx_df_list, pd_df_list, i, delay_threshold, sending_interval, NP)
        if ul_data is not None:
            ul_data["Height"] = height
            ul_data["Inter_UAV_Distance"] = inter_uav_distance
            ul_data["Num_Members"] = num_member
            ul_data["Sending_Interval"] = sending_interval
            ul_data = process_throughput(ul_data, 1)
            ul_df = pd.concat([ul_df, ul_data], ignore_index=True)
    if not ul_df.empty:
        ul_df.to_csv(os.path.join(save_path,"{}_uplink.csv".format(scenario)), index=False)
    
    end_time = time.time()
    print("DL Time: {}".format(start_ul_time-start_dl_time))
    print("UL Time: {}".format(end_time-start_ul_time))

def process_scenario_v2(scenario, sim_root_path, delay_threshold, NP, save_path):
    print(scenario)
    # Dataframes to store UL & DL raw data
    dl_df = pd.DataFrame(columns = ['RxTime','TxTime','Packet_Name','Bytes','RSSI','U2G_SINR','U2U_SINR','U2G_BER','U2U_BER','Hop_Count','Throughput',
                                    'Delay','Queueing_Time','Backoff_Time','U2G_Distance','Height','Inter_UAV_Distance','Num_Members','Sending_Interval',
                                    'Incorrectly_Rcvd','Queue_Overflow','Interface_Down','Number_Dropped','Packet_State']) # Downlink dataframe
    ul_df = pd.DataFrame(columns = ['RxTime','TxTime','Packet_Name','Bytes','RSSI','U2G_SINR','U2U_SINR','U2G_BER','U2U_BER','Hop_Count','Throughput',
                                    'Delay','Queueing_Time','Backoff_Time','U2G_Distance','Height','Inter_UAV_Distance','Num_Members','Sending_Interval',
                                    'Incorrectly_Rcvd','Queue_Overflow','Interface_Down','Number_Dropped','Packet_State']) # Downlink dataframe
    scenario_files = glob.glob(sim_root_path + "/{}_*.csv".format(scenario)) # Get list of csv files belonging to this scenario
    scenario_params = scenario.split('_')
    num_member = int(scenario_params[0].split('-')[-1])
    inter_uav_distance = int(scenario_params[1].split('-')[-1])
    height = int(scenario_params[2].split('-')[-1]) 
    sending_interval = int(scenario_params[5].split('-')[-1])
    rx_df_list, tx_df_list, pd_df_list, mon_df_list = compile_micro_sim_data_v2(scenario_files)

    # Sort out which df is which
    gcs_tx_df = tx_df_list[0]
    gcs_rx_df = rx_df_list[0]
    uavs_tx_df = pd.concat(tx_df_list[1:len(tx_df_list)], ignore_index=True)
    uavs_rx_df = pd.concat(rx_df_list[1:len(rx_df_list)], ignore_index=True)
    
    # Process the state of each packets sent in DL
    start_dl_time = time.time()
    dl_df = process_dropped_packets_v6(gcs_tx_df, uavs_rx_df, pd_df_list, delay_threshold, sending_interval, NP)
    if dl_df is not None:
        start_throughput = time.time()
        dl_df = process_throughput(dl_df,1)
        # dl_df = process_throughput_np(dl_df.to_numpy(), dict(zip(dl_df.columns, list(range(0,len(dl_df.columns))))), 1)
        end_throughput = time.time()
        print("Process Throughput Time: {}".format(end_throughput-start_throughput))
        dl_df["Height"] = height
        dl_df["Inter_UAV_Distance"] = inter_uav_distance
        dl_df["Num_Members"] = num_member
        dl_df["Sending_Interval"] = sending_interval
        dl_df.to_csv(os.path.join(save_path,"{}_downlink.csv".format(scenario)), index=False)

    # Process the state of each packets sent in DL
    start_ul_time = time.time()
    ul_df = process_dropped_packets_v6(uavs_tx_df, gcs_rx_df, pd_df_list, delay_threshold, sending_interval, NP)
    if ul_df is not None:
        start_throughput = time.time()
        ul_df = process_throughput(ul_df, 1)
        # ul_df = process_throughput_np(ul_df.to_numpy(), dict(zip(ul_df.columns, list(range(0,len(ul_df.columns))))), 1)
        end_throughput = time.time()
        print("Process Throughput Time: {}".format(end_throughput-start_throughput))
        ul_df["Height"] = height
        ul_df["Inter_UAV_Distance"] = inter_uav_distance
        ul_df["Num_Members"] = num_member
        ul_df["Sending_Interval"] = sending_interval
        ul_df.to_csv(os.path.join(save_path,"{}_uplink.csv".format(scenario)), index=False)
    
    end_time = time.time()
    print("DL Time: {}".format(start_ul_time-start_dl_time))
    print("UL Time: {}".format(end_time-start_ul_time))

def process_sim_data_v2(sim_root_path, delay_threshold, save_path):
    # Concatenates all UL & DL results from sim_root_path into a single df
    scenario_list = [csv.split('/')[-1][0:-11] for csv in glob.glob(sim_root_path + "/*GCS-Tx.csv")] # Get list of "unique" scenarios

    # For each scenario, extract the UL and DL raw data
    NP = 1000 # The number of packets set in the simulation for each 100m (refer to OMNeT++ ini sim file)
    # NP = int(input("Enter number of packets set in the simulation for each 100m (refer to OMNeT++ ini sim file)"))
    with Pool(2) as pool:
        pool.starmap(process_scenario_v2, zip(scenario_list, repeat(sim_root_path), repeat(delay_threshold), repeat(NP), repeat(save_path)))
    # process_scenario_v2(scenario_list[0],sim_root_path,delay_threshold,NP,save_path)
    return 

Preprocessing Data and save to CSV files

In [None]:
# Let's get the data
sim_root_path = "/home/research-student/omnetpp_sim_results/Testing"
delay_threshold = 0.04
process_sim_data_v2(sim_root_path, delay_threshold=delay_threshold, save_path=sim_root_path)


Testing Stuffs

In [1]:
# Testing categorical data type
import pandas as pd
import time
rx_df = pd.read_csv("/home/research-student/omnetpp_sim_results/Testing/NumMember-7_InterUAVDistance-5_Height-24_Distance-0_PacketSize-24_SendingRate-233_GCS-Rx.csv")
rx_df[["Packet_Type", "Packet_Seq"]] = rx_df.Packet_Name.str.split("-",expand=True)
rx_df["Packet_Type"] = rx_df.Packet_Type.astype('category')
rx_df["Packet_Seq"] = rx_df.Packet_Seq.astype("uint32")
# print(rx_df.head())
# print(rx_df.dtypes)
# rx_df.memory_usage()

# Compare timing performance
start_t = time.time()
rx_df.loc[(rx_df["Packet_Type"] == "UAVData_3") & (rx_df["Packet_Seq"] == 1)] # Elapsed time: 0.0016155242919921875
# rx_df.loc[(rx_df["Packet_Name"] == "UAVData_3-0")] # Elapsed time: 0.024063587188720703
end_t = time.time()
print(end_t - start_t)

0.001888275146484375


In [12]:
import pandas as pd
df_v1 = pd.read_csv("/home/research-student/omnetpp_sim_results/Testing_v1/NumMember-7_InterUAVDistance-5_Height-24_Distance-0_PacketSize-24_SendingRate-233_downlink.csv")
df_v2 = pd.read_csv("/home/research-student/omnetpp_sim_results/Testing_v2/NumMember-7_InterUAVDistance-5_Height-24_Distance-0_PacketSize-24_SendingRate-233_downlink.csv")
df_v2.drop(columns=["Packet_Type", "Packet_Seq"], inplace=True)
# df_v1 == df_v2
print(df_v1["U2G_SINR"])
print(df_v2["U2G_SINR"])

0         486.934000
1         445.416000
2        1012.180000
3         296.010000
4         664.423000
            ...     
50057       0.687825
50058            NaN
50059            NaN
50060            NaN
50061            NaN
Name: U2G_SINR, Length: 50062, dtype: float64
0         486.933990
1         445.415985
2        1012.179993
3         296.010010
4         664.422974
            ...     
50057       0.687825
50058            NaN
50059            NaN
50060            NaN
50061            NaN
Name: U2G_SINR, Length: 50062, dtype: float64


In [None]:
import pandas as pd
tx_df = pd.read_csv("/home/research-student/omnetpp_sim_results/Testing/NumMember-7_InterUAVDistance-5_Height-24_Distance-0_PacketSize-24_SendingRate-233_GCS-Tx.csv")
tx_dict = tx_df.to_dict('records')
tx_time = tx_df.to_dict()["Packet_Name"].values()
print(tx_dict[0]["Packet_Name"])
for row in tx_dict:
    print(row)
    print(row["Bytes"])
    print(row["Packet_Seq"])

In [None]:
import pandas as pd
tx_df = pd.read_csv("/home/research-student/omnetpp_sim_results/Modulation_NumPkts_Test/Test_Np1000_BPSK_6-5Mbps/NumMember-3_InterUAVDistance-5_Height-108_Distance-0_PacketSize-921_SendingRate-809_GCS-Tx.csv")
# tx_df.head()
tx_df["Packet_Seq"] = tx_df["Packet_Seq"].apply(str)
tx_df["Packet_Full_Name"] = tx_df["Packet_Name"] + "-" + tx_df["Packet_Seq"]
tx_df.head()

In [None]:
import pandas as pd
import numpy as np
rx_df = pd.read_csv("/home/research-student/omnetpp_sim_results/Modulation_NumPkts_Test/Test_Np1000_BPSK_6-5Mbps/NumMember-3_InterUAVDistance-5_Height-108_Distance-0_PacketSize-921_SendingRate-809_GCS-Rx.csv")

rx_df = rx_df.drop(["Unnamed: 16"], axis=1)
rx_df["Packet_State"] = np.where(rx_df['Delay'] > 0.003 , "Delay_Exceeded", "Reliable")
rx_df.head()
packet_name, packet_seq = rx_df["Packet_Name"].values[3].split("-")
print(packet_name, packet_seq)

In [None]:
import pandas as pd
import numpy as np
rx_df = pd.read_csv("/home/research-student/omnetpp_sim_results/Modulation_NumPkts_Test/Test_Np1000_BPSK_6-5Mbps/NumMember-3_InterUAVDistance-5_Height-108_Distance-0_PacketSize-921_SendingRate-809_GCS-Rx.csv")
rx_np = rx_df.to_numpy()
Column_Index_Dictionary = dict(zip(rx_df.columns, list(range(0,len(rx_df.columns)))))
print(rx_np)
print(Column_Index_Dictionary)
mask = (rx_np[:,Column_Index_Dictionary["U2G_Distance"]] > 1000)
print(rx_np[mask,:])