In [1]:
import pandas as pd # for data manipulation 
import numpy as np
import networkx as nx # for drawing graphs
import matplotlib.pyplot as plt # for drawing graphs
import os, sys, glob, math
# for creating Bayesian Belief Networks (BBN)
from pybbn.graph.dag import Bbn
from pybbn.graph.edge import Edge, EdgeType
from pybbn.graph.jointree import EvidenceBuilder
from pybbn.graph.node import BbnNode
from pybbn.graph.variable import Variable
from pybbn.pptc.inferencecontroller import InferenceController

def rssi_to_np(rssi):
    # Function to convert rssi data from string (e.g. "435 pW") to exp (435e-12)
    rssi_num = np.zeros(rssi.shape)
    index = 0
    for r in rssi:
        num = r[0:-2]
        expn = r[-2:]
        # print(num)
        # print(expn)
        if expn == " W":
            # print(num)
            # print(index)
            rssi_num[index] = float(num)
        elif expn == "mW":
            rssi_num[index] = float(num) * 1e-3
        elif expn == "uW":
            rssi_num[index] = float(num) * 1e-6
        elif expn == "nW":
            rssi_num[index] = float(num) * 1e-9
        elif expn == "pW":
            rssi_num[index] = float(num) * 1e-12
        else:
            print(expn)
            raise ValueError("Unhandled unit prefix")
        index += 1
    return rssi_num

def compile_micro_sim_data(file_list):
    '''
    Function to compile data from the CSV files generated by each micro-simulation
    Input: file_list - List of simulation files belonging to a certain scenario (micro-sim)
    Output: concatenates the raw data to UL and DL dataframes
    '''
    uavs_rx_df = pd.DataFrame() 
    gcs_rx_df = pd.DataFrame()  
    gcs_tx_df = pd.DataFrame()  
    uavs_tx_df = pd.DataFrame()  
    gcs_mon_df = pd.DataFrame() 
    uavs_mon_df = pd.DataFrame()  
    gcs_pd_df = pd.DataFrame()  
    uavs_pd_df = pd.DataFrame() 

    uavs_rx_df_list = [] # List to store all df for UAVs Rx app
    uavs_tx_df_list = [] # List to store all df for UAVs Tx app
    uavs_mon_df_list = [] # List to store all df for UAVs monitor mode captures
    uavs_pd_df_list = [] # List to store all df for UAVs packet drop captures
    
    for file in file_list:
        try:
            if ('_GCS-' in file) and ('-Tx' in file):
                # DOWNLINK
                # This is the GCS Tx file, recording the sent packets from GCS
                gcs_tx_df = pd.read_csv(file)
            elif ('_GW-' in file) and ('-Rx' in file):
                # DOWNLINK
                # This is the gateway Rx file, let's get the information of packets received from GCS
                gw_rx_df = pd.read_csv(file)
                uavs_rx_df_list.append(gw_rx_df)
            elif ('_UAV-' in file) and ('-Rx' in file):
                # DOWNLINK
                # This is a UAV Rx file. To concatenate all such files into a single df
                uav_rx_df = pd.read_csv(file)
                # uav_cnc_data = uav_rx_df["CNCData" in uav_rx_df["Packet_Name"]] # Get the CNC Data received by this UAV
                # uav_cnc_reliable = uav_cnc_data[uav_cnc_data["Delay"] < delay_th] # Get the CNCData packets received reliably by this UAV (delay < 1ms)
                uavs_rx_df_list.append(uav_rx_df) # Append to list for concatenation later
            elif ('_GCS-' in file) and ('-Rx' in file):
                # UPLINK
                # This is a GCS Rx file, recording packets received from UAVs-
                gcs_rx_df = pd.read_csv(file)
            elif ('_GW-' in file) and ('-Tx' in file):
                # UPLINK
                # This is the gateway Tx file, recording packet transmissions to GCS from gateway
                gw_tx_df = pd.read_csv(file)
                uavs_tx_df_list.append(gw_tx_df) # Append to list for concatenation later
            elif ('_UAV-' in file) and ('-Tx' in file):
                # DOWNLINK
                # This is a UAV Rx file. To concatenate all such files into a single df
                uav_tx_df = pd.read_csv(file)
                uavs_tx_df_list.append(uav_tx_df) # Append to list for concatenation later
            elif ('_GCS-' in file) and ('Wlan' in file):
                # Monitor mode file for GCS
                gcs_mon_df = pd.read_csv(file)
                gcs_mon_df["Addr"] = "192.168.0.1"
            elif ('_GW-' in file) and ('Wlan' in file):
                # Monitor mode file for gateway
                gw_mon_df = pd.read_csv(file)
                gw_mon_df["Addr"] = "192.168.0.2"
                uavs_mon_df_list.append(gw_mon_df)
            elif ('_UAV-' in file) and ('Wlan' in file):
                # Monitor mode file for GCS
                uav_mon_df = pd.read_csv(file)
                uav_index = file.split("_")[-1].split("-")[1]
                uav_mon_df["Addr"] = "192.168.0.{}".format(int(uav_index) + 3)
                uavs_mon_df_list.append(uav_mon_df)
            elif ('_GCS-' in file) and ('PacketDrop' in file):
                # Packet Drop file for GCS
                gcs_pd_df = pd.read_csv(file)
            elif ('_GW-' in file) and ('PacketDrop' in file):
                # Packet Drop file for gateway
                gw_pd_df = pd.read_csv(file)
                uavs_pd_df_list.append(gw_pd_df)
            elif ('_UAV-' in file) and ('PacketDrop' in file):
                # Packet Drop file for GCS
                uav_pd_df = pd.read_csv(file)
                uav_index = file.split("_")[-1].split("-")[1]
                uavs_pd_df_list.append(uav_pd_df)
            else:
                # This file type is not handled, pass 
                pass
        except Exception as e:
            print(file)
            print(e)
        
    try:
        uavs_rx_df = pd.concat(uavs_rx_df_list, ignore_index = True)
        uavs_tx_df = pd.concat(uavs_tx_df_list, ignore_index = True)
        uavs_mon_df = pd.concat(uavs_mon_df_list, ignore_index = True)
        uavs_pd_df = pd.concat(uavs_pd_df_list, ignore_index = True)
    except:
        print("Check if any of the following files are missing")
        print(file_list)

    if len(gcs_rx_df.columns) == 0 or len(gcs_tx_df.columns) == 0 or len(gcs_mon_df.columns) == 0 or len(gcs_pd_df.columns) == 0:
        print("A GCS file is missing")
        print(file_list)


    return uavs_rx_df, gcs_rx_df, gcs_tx_df, uavs_tx_df, gcs_mon_df, uavs_mon_df, gcs_pd_df, uavs_pd_df

def compile_micro_sim_data_v2(file_list):
    '''
    Function to compile data from the CSV files generated by each micro-simulation
    Update: To specifically return the rx_df, tx_df, mon_df and pd_df in lists, so that specific dfs can be accessed (instead of aggregating UAV dfs)
    Input: file_list - List of simulation files belonging to a certain scenario (micro-sim)
    Output: concatenates the raw data to UL and DL dataframes
    '''

    # Let's get the GCS dfs ===============================================================
    gcs_rx_file = [file for file in file_list if (('_GCS-' in file) and ('-Rx' in file))]
    gcs_tx_file = [file for file in file_list if (('_GCS-' in file) and ('-Tx' in file))]
    gcs_mon_file = [file for file in file_list if (('_GCS-' in file) and ('Wlan' in file))]
    gcs_pd_file = [file for file in file_list if (('_GCS-' in file) and ('PacketDrop' in file))]
    if len(gcs_rx_file) > 0:
        gcs_rx_df = pd.read_csv(gcs_rx_file[0])
    else:
        print("GCS RX File Missing")
        print(file_list[0])
    if len(gcs_tx_file) > 0:
        gcs_tx_df = pd.read_csv(gcs_tx_file[0])
    else:
        print("GCS TX File Missing")
        print(file_list[0])
    if len(gcs_pd_file) > 0:
        gcs_pd_df = pd.read_csv(gcs_pd_file[0])
    else:
        print("GCS PD File Missing")
        print(file_list[0])
    if len(gcs_mon_file) > 0:
        gcs_mon_df = pd.read_csv(gcs_mon_file[0]) # Mon file is optional
        gcs_mon_df["Addr"] = "192.168.0.1"
    else: 
        gcs_mon_df = None

    # Let's get the GW dfs ===============================================================
    gw_rx_file = [file for file in file_list if (('_GW-' in file) and ('-Rx' in file))]
    gw_tx_file = [file for file in file_list if (('_GW-' in file) and ('-Tx' in file))]
    gw_mon_file = [file for file in file_list if (('_GW-' in file) and ('Wlan' in file))]
    gw_pd_file = [file for file in file_list if (('_GW-' in file) and ('PacketDrop' in file))]
    if len(gw_rx_file) > 0:
        gw_rx_df = pd.read_csv(gw_rx_file[0])
    else:
        print("GW RX File Missing")
        print(file_list[0])
    if len(gw_tx_file) > 0:
        gw_tx_df = pd.read_csv(gw_tx_file[0])
    else:
        print("GW TX File Missing")
        print(file_list[0])
    if len(gw_pd_file) > 0:
        gw_pd_df = pd.read_csv(gw_pd_file[0])
    else:
        print("GW PD File Missing")
        print(file_list[0])
    if len(gw_mon_file) > 0:
        gw_mon_df = pd.read_csv(gw_mon_file[0]) # Mon file is optional
        gw_mon_df["Addr"] = "192.168.0.2"
    else:
        gw_mon_df = None

    # Let's get the UAVs dfs ===============================================================
    uavs_rx_df_list = []
    uavs_tx_df_list = []
    uavs_mon_df_list = []
    uavs_pd_df_list = []
    uav_rx_files = [file for file in file_list if (('_UAV-' in file) and ('-Rx' in file))]
    uav_tx_files = [file for file in file_list if (('_UAV-' in file) and ('-Tx' in file))]
    uav_mon_files = [file for file in file_list if (('_UAV-' in file) and ('Wlan' in file))]
    uav_pd_files = [file for file in file_list if (('_UAV-' in file) and ('PacketDrop' in file))]
    uav_rx_files.sort()
    uav_tx_files.sort()
    uav_mon_files.sort()
    uav_pd_files.sort()
    if len(uav_rx_files) > 0:
        for uav_rx_file in uav_rx_files:
            uavs_rx_df_list.append(pd.read_csv(uav_rx_file))
    else:
        print("UAV RX File(s) Missing")
        print(file_list[0])
    if len(uav_tx_files) > 0:
        for uav_tx_file in uav_tx_files:
            uavs_tx_df_list.append(pd.read_csv(uav_tx_file))
    else:
        print("UAV TX File(s) Missing")
        print(file_list[0])
    if len(uav_pd_files) > 0:
        for uav_pd_file in uav_pd_files:
            uavs_pd_df_list.append(pd.read_csv(uav_pd_file))
    else:
        print("UAV PD File(s) Missing")
        print(file_list[0])
    if len(uav_mon_files) > 0: # UAV mon files are optional now
        uav_member_index = 3
        for uav_mon_file in uav_mon_files:
            uav_mon_df = pd.read_csv(uav_mon_file)
            uav_mon_df["Addr"] = "192.168.0." + str(uav_member_index)
            uavs_mon_df_list.append(uav_mon_df)
            uav_member_index += 1
    else:
        uavs_mon_df_list = []

    rx_df_list = [gcs_rx_df, gw_rx_df] + uavs_rx_df_list
    tx_df_list = [gcs_tx_df, gw_tx_df] + uavs_tx_df_list
    pd_df_list = [gcs_pd_df, gw_pd_df] + uavs_pd_df_list
    mon_df_list = [gcs_mon_df, gw_mon_df] + uavs_mon_df_list

    # UNCOMMENT BELOW IF RSSI DATA WILL BE USED
    # for rx_df in rx_df_list:
    #     rx_df["RSSI"] = rssi_to_np(rx_df["RSSI"])
    # for tx_df in tx_df_list:
    #     tx_df["RSSI"] = rssi_to_np(tx_df["RSSI"])
    # for pd_df in pd_df_list:
    #     pd_df["RSSI"] = rssi_to_np(pd_df["RSSI"])
    # for mon_df in mon_df_list:
    #     mon_df["RSSI"] = rssi_to_np(mon_df["RSSI"])

    return rx_df_list, tx_df_list, pd_df_list, mon_df_list

def process_dropped_packets_v1(tx_df, rx_df, mon_df, pd_df):
    '''
    This function is to fill in missing data in rx_df with data from mon_df for dropped packets that are not recorded in rx_df
    tx_df contains the list of all transmitted network packets (UL/DL)
    rx_df should only contain the captures of packets received successfully (regardless of delay)
    mon_df contains the monitor mode captures recorded on the Rx side, and contains information of packets not received successfully
    pd_df contains the packet drop recorded with reason, recorded on Rx side
    DON'T MIX UL AND DL DATA TOGETHER IN THIS FUNCTION, EVALUATE THEM SEPARATELY.

    VERSION 1: Cross-check packets dropped by comparing Tx df and Rx df,  then get packet drop reason from pd_df and match with packets in mon_df
    '''
    # Firstly, let's mark all the rows in rx_df as having been received correctly
    rx_df["Packet_Drop_Reason"] = "None"
    for index, row in tx_df.iterrows():
        packetName = row["Packet_Name"] + "-" + str(row["Packet_Seq"])

        # First, check if the packet is received successfully in rx_df
        if (packetName not in rx_df["Packet_Name"].values):
            dest_addr = row["Dest_Addr"]
            # If not received, find the packet drop reason

            # FOR PACKETS RCVD INCORRECTLY
            packet_rcvd_incorrectly = pd_df.loc[(pd_df["Packet_Name"] == packetName) & (pd_df["Packet_Drop_Reason"] == "INCORRECTLY_RECEIVED")] # Packets of this name that dropped due to errors
            # Get the information on each packet dropped from monitor dataframe, using the unique Rx time to find the corresponding packets in mon_df
            rx_time_dropped = packet_rcvd_incorrectly["RxTime"].values
            err_pks_mon = mon_df.loc[(mon_df["Packet_Name"] == packetName) & (mon_df["RxTime"].isin(rx_time_dropped))] 
            err_pks_mon.rename(columns={"PkCreationTime": "TxTime"})
            err_pks_mon["Src_Addr"] = "-" # TODO
            err_pks_mon["Src_Port"] = "-" # TODO
            err_pks_mon["Dest_Addr"] = row["Dest_Addr"]
            err_pks_mon["Dest_Port"] = row["Dest_Port"]
            err_pks_mon["Packet_Drop_Reason"] = "INCORRECTLY_RECEIVED"
            rx_df = pd.concat([rx_df,err_pks_mon], ignore_index = True)

            # TODO: Include other packet drop reasons

    rx_df = rx_df.sort_values("RxTime")
    rx_df = rx_df.reset_index()
    return rx_df

def process_dropped_packets_v2(rx_df, mon_df, pd_df):
    '''
    This function is to fill in missing data in rx_df with data from mon_df for dropped packets that are not recorded in rx_df
    rx_df should only contain the captures of packets received successfully (regardless of delay)
    mon_df contains the monitor mode captures recorded on the Rx side, and contains information of packets not received successfully
    pd_df contains the packet drop recorded with reason, recorded on Rx side
    DON'T MIX UL AND DL DATA TOGETHER IN THIS FUNCTION, EVALUATE THEM SEPARATELY.

    VERSION 2: Just match the packets in pd_df with the ones in mon_df, using the common unique RxTime. This should handle all 
    '''
    # Firstly, let's mark all the rows in rx_df as having been received correctly
    rx_df["Packet_Drop_Reason"] = "None"

    # TODO: Check for duplicate RxTime in mon_df before using this method?

    # Get packets that Rx received fromn Tx but was dropped (due to incorrectly received / hop limit etc, not due to retry limit / ARP / routing)
    packet_rcvd_dropped = pd_df.loc[(pd_df["Packet_Drop_Reason"].isin(["INCORRECTLY_RECEIVED"]))] # Add to the list the packet drop reasons to include
    packet_rcvd_dropped = packet_rcvd_dropped.sort_values("RxTime")
    rx_time_dropped = packet_rcvd_dropped["RxTime"].values
    pks_dropped_mon = mon_df.loc[(mon_df["RxTime"].isin(rx_time_dropped))] # THIS ASSUMPTION IS WRONG!!!
    pks_dropped_mon = pks_dropped_mon.sort_values("RxTime")
    pks_dropped_mon.rename(columns={"PkCreationTime": "TxTime"})
    pks_dropped_mon["Src_Addr"] = "-" 
    pks_dropped_mon["Src_Port"] = "-" 
    pks_dropped_mon["Dest_Addr"] = "-" 
    pks_dropped_mon["Dest_Port"] = "-" 
    pks_dropped_mon["Packet_Drop_Reason"] = packet_rcvd_dropped["Packet_Drop_Reason"] 
    rx_df = pd.concat([rx_df,pks_dropped_mon], ignore_index = True)

    rx_df = rx_df.sort_values("RxTime")
    rx_df = rx_df.reset_index()
    return rx_df

def process_dropped_packets_v3(rx_df, pd_df):
    '''
    This function is to fill in missing data in rx_df with data from mon_df for dropped packets that are not recorded in rx_df
    rx_df should only contain the captures of packets received successfully (regardless of delay)
    mon_df contains the monitor mode captures recorded on the Rx side, and contains information of packets not received successfully
    pd_df contains the packet drop recorded with reason, recorded on Rx side
    DON'T MIX UL AND DL DATA TOGETHER IN THIS FUNCTION, EVALUATE THEM SEPARATELY.

    VERSION 3: Takes the packets from pd_df that was INCORRECTLY_RECEIVED, and puts them in rx_df
    '''
    # Firstly, let's mark all the rows in rx_df as having been received correctly
    rx_df["Packet_Drop_Reason"] = "None"

    # Get packets that Rx received fromn Tx but was dropped (due to incorrectly received / hop limit etc, not due to retry limit / ARP / routing)
    packet_rcvd_dropped = pd_df.loc[(pd_df["Packet_Drop_Reason"].isin(["INCORRECTLY_RECEIVED"]))] # Add to the list the packet drop reasons to include
    # packet_rcvd_dropped = packet_rcvd_dropped.sort_values("RxTime")
    packet_rcvd_dropped["Src_Addr"] = "-" 
    packet_rcvd_dropped["Src_Port"] = "-" 
    packet_rcvd_dropped["Dest_Addr"] = "-" 
    packet_rcvd_dropped["Dest_Port"] = "-" 
    packet_rcvd_dropped["Hop_Count"] = "-" 
    rx_df = pd.concat([rx_df,packet_rcvd_dropped], ignore_index = True)

    rx_df = rx_df.sort_values("RxTime")
    rx_df = rx_df.reset_index()
    return rx_df

def process_dropped_packets_v4(tx_df, rx_df, pd_df):
    '''
    This function is to fill in missing data in rx_df with data from mon_df for dropped packets that are not recorded in rx_df
    tx_df contains the list of all transmitted network packets (UL/DL)
    rx_df should only contain the captures of packets received successfully (regardless of delay)
    pd_df contains the packet drop recorded with reason, recorded on Rx side
    DON'T MIX UL AND DL DATA TOGETHER IN THIS FUNCTION, EVALUATE THEM SEPARATELY.

    VERSION 4: For each packet transmitted (in tx_df), get the number of tries and the packet drop reason(s) from pd_df. Update the data in rx_df.
    '''

    # First, let's delete the columns src_addr, src_port, dest_addr, dest_port from rx_df
    del rx_df["Src_Addr"]
    del rx_df["Src_Port"]
    del rx_df["Dest_Addr"]
    del rx_df["Dest_Port"]

    for index, row in tx_df.iterrows():
        packetName = row["Packet_Name"] + "-" + str(row["Packet_Seq"])

        # For each packet in tx_df, get the packet drops
        pkt_drops = pd_df.loc[(pd_df["Packet_Name"] == packetName)]
        if not pkt_drops.empty:
            drop_reasons = pkt_drops["Packet_Drop_Reason"].values
            # Count the occurences of each failure modes for a particular packet
            incorrect_rcvd = np.count_nonzero(drop_reasons == "INCORRECTLY_RECEIVED")
            arp_fail = np.count_nonzero(drop_reasons == "ADDRESS_RESOLUTION_FAILED") # TODO: Check whether this is stored on tx_pd_df or rx_pd_df
            queue_overflow = np.count_nonzero(drop_reasons == "QUEUE_OVERFLOW")
            hop_limit = np.count_nonzero(drop_reasons == "HOP_LIMIT_REACHED")
            interface_down = np.count_nonzero(drop_reasons == "INTERFACE_DOWN")
            other_drop = np.count_nonzero(drop_reasons == "OTHER_PACKET_DROP")
            num_drops = len(drop_reasons)

            # Update rx_df 
            if (packetName not in rx_df["Packet_Name"].values):
                # If not received, add the data of failed packet
                rx_time = max(pkt_drops["RxTime"].values)
                tx_time = min(pkt_drops["TxTime"].values)
                bytes = row["Bytes"]
                rssi = pkt_drops["RSSI"].mean()
                sinr = pkt_drops["SINR"].mean()
                u2g_sinr = pkt_drops["U2G_SINR"].mean()
                ber = pkt_drops["BER"].mean()
                u2g_ber = pkt_drops["U2G_BER"].mean()
                delay = max(pkt_drops["Delay"].values)
                queueing_time = max(pkt_drops["Queueing_Time"].values)
                backoff_time = max(pkt_drops["Backoff_Time"].values)
                distance = max(pkt_drops["Distance"].values)
                u2g_distance = max(pkt_drops["U2G_Distance"].values)
                pkt_state = "FAILED" # The packet failed to be received (RETRY_LIMIT_EXCEEDED)
                if (len(pkt_drops["U2U_SINR"].values) > 0): # There may not always be a U2U communication
                    u2u_sinr = pkt_drops["U2U_SINR"].mean()
                    u2u_ber = pkt_drops["U2U_BER"].mean()
                    hop_count = 2
                else:
                    hop_count = 1
                failed_pkt = pd.DataFrame([{'RxTime': rx_time,'TxTime': tx_time,'Packet_Name': packetName,'Bytes': bytes,'RSSI': rssi,'SINR': sinr,'U2G_SINR': u2g_sinr,'U2U_SINR': u2u_sinr,
                              'BER': ber,'U2G_BER': u2g_ber,'U2U_BER': u2u_ber,'Hop_Count': hop_count,'Delay': delay,'Queueing_Time': queueing_time,'Backoff_Time': backoff_time,'Distance': distance,'U2G_Distance': u2g_distance,
                              'Incorrectly_Rcvd': incorrect_rcvd,'ARP_Fail': arp_fail,'Queue_Overflow': queue_overflow,'Hop_Limit_Reached': hop_limit,'Interface_Down': interface_down,'Other_Dropped': other_drop,'Number_Dropped': num_drops,
                              'Packet_State': pkt_state}])
                rx_df = pd.concat([rx_df,failed_pkt], ignore_index = True)
            else:
                # If packet successfully received, update the number of tries and the reason for failed attempt(s)
                rx_df["Incorrectly_Rcvd"] = incorrect_rcvd
                rx_df["ARP_Fail"] = arp_fail
                rx_df["Queue_Overflow"] = queue_overflow
                rx_df["Hop_Limit_Reached"] = hop_limit
                rx_df["Interface_Down"] = interface_down
                rx_df["Other_Dropped"] = other_drop
                rx_df["Number_Dropped"] = num_drops
                rx_df["Packet_State"] = "RECEIVED"

        elif (packetName in rx_df["Packet_Name"].values):
            # The packet was received without any retries
            rx_df["Incorrectly_Rcvd"] = 0
            rx_df["ARP_Fail"] = 0
            rx_df["Queue_Overflow"] = 0
            rx_df["Hop_Limit_Reached"] = 0
            rx_df["Interface_Down"] = 0
            rx_df["Other_Dropped"] = 0
            rx_df["Number_Dropped"] = 0
            rx_df["Packet_State"] = "RECEIVED"
        
        else:
            print("No packet drop recorded and packet not found in rx_df for packet: {}. This should not happen".format(packetName))

    rx_df = rx_df.sort_values("RxTime")
    rx_df = rx_df.reset_index()
    return rx_df

def process_dropped_packets_DL(tx_df, rx_df_list, pd_df_list, tx_index):
    '''
    This function is to compile packet information from the tx, rx and pd dataframes, fopr downlink comm. (GCS to UAVs)
    tx_df: Tx DF 
    rx_df_list: List of Rx DFs, first one is for GCS, second for GW, subsequent DFs in the list for UAV 1, 2, ...
    pd_df_list: List of packet drop DFs, first one is for GCS, second for GW, subsequent DFs in the list for UAV 1, 2, ...
    tx_index: Index of Tx in pd_df_list (e.g. for GCS, tx_index = 0)
    Output: pkt_df: DF containing info on packets from tx_df received and dropped 
    '''
    pkt_df = pd.DataFrame(columns = ['RxTime','TxTime','Packet_Name','Bytes','RSSI','U2G_SINR','U2U_SINR','U2G_BER','U2U_BER',
                                    'Hop_Count','Delay','Queueing_Time','Backoff_Time','U2G_Distance',
                                    'Incorrectly_Rcvd','Queue_Overflow','Interface_Down','Number_Dropped','Packet_State'])
    for index, row in tx_df.iterrows():
        packetName = row["Packet_Name"] + "-" + str(row["Packet_Seq"])
        dest_addr = row["Dest_Addr"]
        rx_index = int(dest_addr.split(".")[-1]) - 1

        # For each packet in gcs_tx_df, get the packet drops from GW and corresponding UAV
        pkt_drops_tx = pd_df_list[tx_index].loc[(pd_df["Packet_Name"] == packetName)] # Packets dropped at the transmitter, to catch QUEUE_OVERFLOW and INTERFACE_DOWN
        pkt_drops_gw = pd_df_list[1].loc[(pd_df["Packet_Name"] == packetName)] # Packets dropped at the gateway UAV
        if rx_index != 1: # If not the GW, include packet drops at receiver. Else no need, cos GW is Rx
            pkt_drops_rx = pd_df_list[rx_index].loc[(pd_df["Packet_Name"] == packetName)] # Packets dropped at the receiver (GCS / UAV)
            pkt_drops = pd.concat([pkt_drops_gw, pkt_drops_rx], ignore_index = True)
        else:
            pkt_drops = pkt_drops_gw

        if not pkt_drops.empty: # Find the packet drops for this particular packet
            drop_reasons = pkt_drops["Packet_Drop_Reason"].values # List of pkt drop reasons at GW and Rx
            drop_reasons_tx = pkt_drops_tx["Packet_Drop_Reason"].values # List of pkt drop reasons at Tx
            # Count the occurences of each failure modes for a particular packet
            incorrect_rcvd = np.count_nonzero(drop_reasons == "INCORRECTLY_RECEIVED")
            queue_overflow = np.count_nonzero(drop_reasons_tx == "QUEUE_OVERFLOW")
            retry_limit_excd = np.count_nonzero(drop_reasons_tx == "RETRY_LIMI_REACHED")
            interface_down = np.count_nonzero(drop_reasons_tx == "INTERFACE_DOWN")
            num_drops = len(drop_reasons) # This is for counting drops due to incorrectly received only

            # Update pkt_df 
            rx_df = rx_df_list[rx_index]
            if (packetName not in rx_df["Packet_Name"].values):
                # If not received, add the data of failed packet
                rx_time = max(pkt_drops["RxTime"].values)
                tx_time = min(pkt_drops["TxTime"].values)
                bytes = row["Bytes"]
                rssi = pkt_drops["RSSI"].mean()
                # sinr = pkt_drops["SINR"].mean()
                u2g_sinr = pkt_drops["U2G_SINR"].mean()
                # ber = pkt_drops["BER"].mean()
                u2g_ber = pkt_drops["U2G_BER"].mean()
                delay = max(pkt_drops["Delay"].values)
                queueing_time = max(pkt_drops["Queueing_Time"].values)
                backoff_time = max(pkt_drops["Backoff_Time"].values)
                # distance = max(pkt_drops["Distance"].values)
                u2g_distance = max(pkt_drops["U2G_Distance"].values)
                pkt_state = "FAILED" # The packet failed to be received (RETRY_LIMIT_EXCEEDED)
                if (len(pkt_drops["U2U_SINR"].values) > 0): # There may not always be a U2U communication
                    u2u_sinr = pkt_drops["U2U_SINR"].mean()
                    u2u_ber = pkt_drops["U2U_BER"].mean()
                    hop_count = 2
                else:
                    u2u_sinr = None
                    u2u_ber = None
                    hop_count = 1
                failed_pkt = pd.DataFrame([{'RxTime': rx_time,'TxTime': tx_time,'Packet_Name': packetName,'Bytes': bytes,'RSSI': rssi,'U2G_SINR': u2g_sinr,'U2U_SINR': u2u_sinr,
                              'U2G_BER': u2g_ber,'U2U_BER': u2u_ber,'Hop_Count': hop_count,'Delay': delay,'Queueing_Time': queueing_time,'Backoff_Time': backoff_time,'U2G_Distance': u2g_distance,
                              'Incorrectly_Rcvd': incorrect_rcvd,'Queue_Overflow': queue_overflow,'Interface_Down': interface_down,'Number_Dropped': num_drops,'Packet_State': pkt_state}])
                pkt_df = pd.concat([pkt_df,failed_pkt], ignore_index = True)

    #         else:
    #             # If packet successfully received, update the number of tries and the reason for failed attempt(s)
    #             rx_df["Incorrectly_Rcvd"] = incorrect_rcvd
    #             rx_df["ARP_Fail"] = arp_fail
    #             rx_df["Queue_Overflow"] = queue_overflow
    #             rx_df["Hop_Limit_Reached"] = hop_limit
    #             rx_df["Interface_Down"] = interface_down
    #             rx_df["Other_Dropped"] = other_drop
    #             rx_df["Number_Dropped"] = num_drops
    #             rx_df["Packet_State"] = "RECEIVED"

    #     elif (packetName in rx_df["Packet_Name"].values):
    #         # The packet was received without any retries
    #         rx_df["Incorrectly_Rcvd"] = 0
    #         rx_df["ARP_Fail"] = 0
    #         rx_df["Queue_Overflow"] = 0
    #         rx_df["Hop_Limit_Reached"] = 0
    #         rx_df["Interface_Down"] = 0
    #         rx_df["Other_Dropped"] = 0
    #         rx_df["Number_Dropped"] = 0
    #         rx_df["Packet_State"] = "RECEIVED"
        
    #     else:
    #         print("No packet drop recorded and packet not found in rx_df for packet: {}. This should not happen".format(packetName))

    # rx_df = rx_df.sort_values("RxTime")
    # rx_df = rx_df.reset_index()
    return rx_df


def process_throughput(df, timeDiv):
    '''
    Function to calculate throughput data for a DataFrame
    timeDiv is the time division to use for calculating the throughput
    '''
    maxTime = math.ceil(float(df["RxTime"].max()))
    for i in range(math.ceil(maxTime / timeDiv)):
        df_in_range = df.loc[(df["RxTime"] >= (i*timeDiv)) & (df["RxTime"] < ((i+1)*timeDiv)) & (df["Packet_State"] == "RECEIVED")]
        totalBytes = df_in_range["Bytes"].sum()
        throughput = totalBytes / timeDiv
        df.loc[(df["RxTime"] >= (i*timeDiv)) & (df["RxTime"] < ((i+1)*timeDiv)), "Throughput"] = throughput
    return df

def process_sim_data(sim_root_path, delay_threshold):
    # Concatenates all UL & DL results from sim_root_path into a single df
    scenario_list = [csv.split('/')[-1][0:-11] for csv in glob.glob(sim_root_path + "/*GCS-Tx.csv")] # Get list of "unique" scenarios

    # Dataframes to store UL & DL raw data
    dl_df = pd.DataFrame(columns = ['RxTime','TxTime','Packet_Name','Bytes','RSSI','SINR','U2G_SINR','U2U_SINR','BER','U2G_BER','U2U_BER',
                                    'Hop_Count','Throughput','Delay','Queueing_Time','Backoff_Time','Distance','U2G_Distance','Height','Inter_UAV_Distance',
                                    'Num_Members','Sending_Interval','Delay_Exceeded','Reliable','Incorrectly_Rcvd','ARP_Fail','Queue_Overflow','Hop_Limit_Reached',
                                    'Interface_Down','Other_Dropped','Number_Dropped','Packet_State']) # Downlink dataframe
    ul_df = pd.DataFrame(columns = ['RxTime','TxTime','Packet_Name','Bytes','RSSI','SINR','U2G_SINR','U2U_SINR','BER','U2G_BER','U2U_BER',
                                    'Hop_Count','Throughput','Delay','Queueing_Time','Backoff_Time','Distance','U2G_Distance','Height','Inter_UAV_Distance',
                                    'Num_Members','Sending_Interval','Delay_Exceeded','Reliable','Incorrectly_Rcvd','ARP_Fail','Queue_Overflow','Hop_Limit_Reached',
                                    'Interface_Down','Other_Dropped','Number_Dropped','Packet_State']) # Uplink dataframe

    # For each scenario, extract the UL and DL raw data
    for scenario in scenario_list:
        scenario_files = glob.glob(sim_root_path + "/{}_*.csv".format(scenario)) # Get list of csv files belonging to this scenario
        scenario_params = scenario.split('_')
        num_member = int(scenario_params[0].split('-')[-1])
        inter_uav_distance = int(scenario_params[1].split('-')[-1])
        height = int(scenario_params[2].split('-')[-1])
        # swarm_hor_distance = int(scenario_params[3].split('-')[-1]) # Horizontal Swarm Distance
        # swarm_distance = math.sqrt(int(height)**2 + swarm_hor_distance**2)
        # packet_size = int(scenario_params[4].split('-')[-1])
        sending_interval = int(scenario_params[5].split('-')[-1])
        dl_data, ul_data, dl_tx_df, ul_tx_df, gcs_mon_df, uavs_mon_df, gcs_pd_df, uavs_pd_df = compile_micro_sim_data(scenario_files)
        # Convert the RSSI data to np
        dl_data["RSSI"] = rssi_to_np(dl_data["RSSI"])
        ul_data["RSSI"] = rssi_to_np(ul_data["RSSI"])
        gcs_pd_df["RSSI"] = rssi_to_np(gcs_pd_df["RSSI"])
        uavs_pd_df["RSSI"] = rssi_to_np(uavs_pd_df["RSSI"])
        # gcs_mon_df["RSSI"] = rssi_to_np(gcs_mon_df["RSSI"])
        # uavs_mon_df["RSSI"] = rssi_to_np(uavs_mon_df["RSSI"])
        # Process the failed packets data into the main dataframe
        dl_data = process_dropped_packets_v4(dl_tx_df, dl_data, uavs_pd_df)
        ul_data = process_dropped_packets_v4(ul_tx_df, ul_data, gcs_pd_df)
        if dl_data is not None:
            dl_data["Height"] = height
            dl_data["Inter_UAV_Distance"] = inter_uav_distance
            dl_data["Num_Members"] = num_member
            dl_data["Sending_Interval"] = sending_interval
            # Fill in reliability data
            dl_data["Delay_Exceeded"] = 0
            dl_data.loc[dl_data["Delay"] > delay_threshold, "Delay_Exceeded"] = 1
            dl_data["Reliable"] = 0
            dl_data.loc[(dl_data["Delay_Exceeded"] == 0) & (dl_data["Packet_State"] == "RECEIVED"), "Reliable"] = 1
            dl_data = process_throughput(dl_data, 1)
            dl_df = pd.concat([dl_df, dl_data], ignore_index=True)
        if ul_data is not None:
            ul_data["Height"] = height
            ul_data["Inter_UAV_Distance"] = inter_uav_distance
            ul_data["Num_Members"] = num_member
            ul_data["Sending_Interval"] = sending_interval
            # Fill in reliability data
            ul_data["Delay_Exceeded"] = 0
            ul_data.loc[ul_data["Delay"] > delay_threshold, "Delay_Exceeded"] = 1
            ul_data["Reliable"] = 0
            ul_data.loc[(ul_data["Delay_Exceeded"] == 0) & (ul_data["Packet_State"] == 'RECEIVED'), "Reliable"] = 1
            ul_data = process_throughput(ul_data, 1)
            ul_df = pd.concat([ul_df, ul_data], ignore_index=True)
    
    return dl_df, ul_df

def process_sim_data_v2(sim_root_path, delay_threshold):
    # Concatenates all UL & DL results from sim_root_path into a single df
    scenario_list = [csv.split('/')[-1][0:-11] for csv in glob.glob(sim_root_path + "/*GCS-Tx.csv")] # Get list of "unique" scenarios
        num_member 
    # Dataframes to store UL & DL raw data
    dl_df = pd.DataFrame(columns = ['RxTime','TxTime','Packet_Name','Bytes','RSSI','U2G_SINR','U2U_SINR','U2G_BER','U2U_BER',
                                    'Hop_Count','Throughput','Delay','Queueing_Time','Backoff_Time','U2G_Distance','Height','Inter_UAV_Distance',
                                    'Num_Members','Sending_Interval','Delay_Exceeded','Reliable','Incorrectly_Rcvd','Queue_Overflow',
                                    'Interface_Down','Number_Dropped','Packet_State']) # Downlink dataframe
    ul_df = pd.DataFrame(columns = ['RxTime','TxTime','Packet_Name','Bytes','RSSI','U2G_SINR','U2U_SINR','U2G_BER','U2U_BER',
                                    'Hop_Count','Throughput','Delay','Queueing_Time','Backoff_Time','U2G_Distance','Height','Inter_UAV_Distance',
                                    'Num_Members','Sending_Interval','Delay_Exceeded','Reliable','Incorrectly_Rcvd','Queue_Overflow',
                                    'Interface_Down','Number_Dropped','Packet_State']) # Uplink dataframe

    # For each scenario, extract the UL and DL raw data
    for scenario in scenario_list:
        scenario_files = glob.glob(sim_root_path + "/{}_*.csv".format(scenario)) # Get list of csv files belonging to this scenario
        scenario_params = scenario.split('_')
        num_member = int(scenario_params[0].split('-')[-1])
        inter_uav_distance = int(scenario_params[1].split('-')[-1])
        height = int(scenario_params[2].split('-')[-1]) 
        sending_interval = int(scenario_params[5].split('-')[-1])
        rx_df_list, tx_df_list, pd_df_list, mon_df_list = compile_micro_sim_data_v2(scenario_files)
        # Process the failed packets data into the main dataframe
        dl_data = process_dropped_packets_v4(dl_tx_df, dl_data, uavs_pd_df)
        ul_data = process_dropped_packets_v4(ul_tx_df, ul_data, gcs_pd_df)
        if dl_data is not None:
            dl_data["Height"] = height
            dl_data["Inter_UAV_Distance"] = inter_uav_distance
            dl_data["Num_Members"] = num_member
            dl_data["Sending_Interval"] = sending_interval
            # Fill in reliability data
            dl_data["Delay_Exceeded"] = 0
            dl_data.loc[dl_data["Delay"] > delay_threshold, "Delay_Exceeded"] = 1
            dl_data["Reliable"] = 0
            dl_data.loc[(dl_data["Delay_Exceeded"] == 0) & (dl_data["Packet_State"] == "RECEIVED"), "Reliable"] = 1
            dl_data = process_throughput(dl_data, 1)
            dl_df = pd.concat([dl_df, dl_data], ignore_index=True)
        if ul_data is not None:
            ul_data["Height"] = height
            ul_data["Inter_UAV_Distance"] = inter_uav_distance
            ul_data["Num_Members"] = num_member
            ul_data["Sending_Interval"] = sending_interval
            # Fill in reliability data
            ul_data["Delay_Exceeded"] = 0
            ul_data.loc[ul_data["Delay"] > delay_threshold, "Delay_Exceeded"] = 1
            ul_data["Reliable"] = 0
            ul_data.loc[(ul_data["Delay_Exceeded"] == 0) & (ul_data["Packet_State"] == 'RECEIVED'), "Reliable"] = 1
            ul_data = process_throughput(ul_data, 1)
            ul_df = pd.concat([ul_df, ul_data], ignore_index=True)
    
    return dl_df, ul_df

Preprocessing Data and save to CSV files

In [54]:
# Let's get the data
sim_root_path = "/home/research-student/omnetpp_sim_results/Test2"
delay_threshold = 1
dl_df, ul_df = process_sim_data(sim_root_path, delay_threshold=delay_threshold)
# Save DF to CSV
dl_df.to_csv(os.path.join(sim_root_path,"FANET_downlink_raw.csv"), index=False)
ul_df.to_csv(os.path.join(sim_root_path,"FANET_uplink_raw.csv"), index=False)

No packet drop recorded and packet not found in rx_df for packet: UAVData_0-9117. This should not happen
No packet drop recorded and packet not found in rx_df for packet: UAVData_0-9482. This should not happen
No packet drop recorded and packet not found in rx_df for packet: UAVData_0-9644. This should not happen
No packet drop recorded and packet not found in rx_df for packet: GatewayData-7575. This should not happen
No packet drop recorded and packet not found in rx_df for packet: GatewayData-7996. This should not happen
No packet drop recorded and packet not found in rx_df for packet: GatewayData-8368. This should not happen
No packet drop recorded and packet not found in rx_df for packet: GatewayData-8454. This should not happen
No packet drop recorded and packet not found in rx_df for packet: GatewayData-9211. This should not happen
No packet drop recorded and packet not found in rx_df for packet: GatewayData-9587. This should not happen
No packet drop recorded and packet not foun

In [2]:
sim_root_path = "/home/research-student/omnetpp_sim_results/Testing"
delay_threshold = 1
rx_df_list, tx_df_list, pd_df_list, mon_df_list = process_sim_data_v2(sim_root_path, delay_threshold=delay_threshold)

['/home/research-student/omnetpp_sim_results/Testing/NumMember-3_InterUAVDistance-5_Height-50_Distance-0_PacketSize-24_SendingRate-40_UAV-0-Rx.csv', '/home/research-student/omnetpp_sim_results/Testing/NumMember-3_InterUAVDistance-5_Height-50_Distance-0_PacketSize-24_SendingRate-40_UAV-1-Rx.csv', '/home/research-student/omnetpp_sim_results/Testing/NumMember-3_InterUAVDistance-5_Height-50_Distance-0_PacketSize-24_SendingRate-40_UAV-2-Rx.csv']
