Date: 19/06/2023
Desc: To debug the missing cpt values in BN CPT.

# Import

In [19]:
import pandas as pd
import numpy as np 
import os
import math
import pickle

# This function helps to calculate probability distribution, which goes into BBN (note, can handle up to 2 parents)
def cpt_probs(df, child, parents):
    try:
        # dependencies_arr = [pd.Categorical(df[parent],categories=df[parent].cat.categories.tolist()) for parent in parents]
        dependencies_arr = [df[parent] for parent in parents]
        # cpt = pd.crosstab(dependencies_arr, df[child], rownames=parents, colnames=[child], margins=False, normalize='index', dropna=False).sort_index().to_numpy().reshape(-1).tolist()
        cpt = pd.crosstab(dependencies_arr, df[child], rownames=parents, colnames=[child], margins=False, normalize='index', dropna=False).sort_index()
        return cpt
    except Exception as err:
        print(err)
        return None 

# Load Dataset

In [20]:
df_dtypes = {"TxTime": np.float32, "U2G_Distance": np.float32, "Height": np.int16,	"Num_Members": np.int16, "UAV_Sending_Interval": np.int16, "Bytes": np.int16, 
            "U2G_SINR": np.float32, "U2G_BER": np.float32, "Delay": np.float32, "Throughput": np.float32, "Queueing_Time": np.float32, "Packet_State": 'category', 
            "Retry_Count": np.int8, "Incorrectly_Received": np.int8, "Queue_Overflow": np.int8, "Packet_Name": 'string', "Mean_SINR": np.float32, "Std_Dev_SINR": np.float32,
            "UAV_Sending_Interval": np.int16}

# Load test dataset ==========================================================================================================================
ul_df_bpsk = pd.read_csv("/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_Video/BPSK_processed_holdout_uplink.csv",
                    usecols = ["Mean_SINR", "Std_Dev_SINR", "Num_Members", "UAV_Sending_Interval", "Packet_State", "Delay", "U2G_H_Dist", "Height"],
                    dtype=df_dtypes)
ul_df_bpsk["Modulation"] = "BPSK"

ul_df_qpsk = pd.read_csv("/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_Video/QPSK_processed_holdout_uplink.csv",
                    usecols = ["Mean_SINR", "Std_Dev_SINR", "Num_Members", "UAV_Sending_Interval", "Packet_State", "Delay", "U2G_H_Dist", "Height"],
                    dtype=df_dtypes)
ul_df_qpsk["Modulation"] = "QPSK"

ul_df_qam16 = pd.read_csv("/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_Video/QAM16_processed_holdout_uplink.csv",
                    usecols = ["Mean_SINR", "Std_Dev_SINR", "Num_Members", "UAV_Sending_Interval", "Packet_State", "Delay", "U2G_H_Dist", "Height"],
                    dtype=df_dtypes)
ul_df_qam16["Modulation"] = "QAM16"

ul_df_qam64 = pd.read_csv("/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_Video/QAM64_processed_holdout_uplink.csv",
                    usecols = ["Mean_SINR", "Std_Dev_SINR", "Num_Members", "UAV_Sending_Interval", "Packet_State", "Delay", "U2G_H_Dist", "Height"],
                    dtype=df_dtypes)
ul_df_qam64["Modulation"] = "QAM64"

ul_df = pd.concat([ul_df_bpsk, ul_df_qpsk, ul_df_qam16, ul_df_qam64], ignore_index=True)
# Load test dataset ==========================================================================================================================

In [3]:
h_dist_nan = ul_df['U2G_H_Dist'].isnull().values.any()
height_nan = ul_df['Height'].isnull().values.any()
print(h_dist_nan, height_nan)

False False


# Discretize Dataset

In [21]:
# Define ranges of input parameters
max_height = 300
min_height = 60
max_h_dist = 1200
min_h_dist = 0

# Normalize data (Min Max Normalization between [-1,1])
ul_df["Height_Norm"] = ul_df["Height"].apply(lambda x: 2*(x-min_height)/(max_height-min_height) - 1)
ul_df["U2G_H_Dist_Norm"] = ul_df["U2G_H_Dist"].apply(lambda x: 2*(x-min_h_dist)/(max_h_dist-min_h_dist) - 1)
ul_df["UAV_Sending_Interval_Class"] = ul_df["UAV_Sending_Interval"].replace({10:'vs', 20:'s', 40:'m', 100:'l', 1000:'vl'})
# ul_df['Packet_State'] = ul_df['Packet_State'].replace({"Reliable":0, "QUEUE_OVERFLOW":1, "RETRY_LIMIT_REACHED":2, "Delay_Exceeded":3})

# Discretize the h_dist and height of SINR
h_dist_num_classes = 61
h_dist_labels = [str(num) for num in np.arange(0,h_dist_num_classes)+1]
uav_swarm_radius = 5
h_dist_bnd_offset = 2 * uav_swarm_radius / max_h_dist
h_dist_class_bnd = np.linspace(-1, 1, h_dist_num_classes, endpoint=True)
h_dist_class_bnd[1:len(h_dist_class_bnd)] = h_dist_class_bnd[1:len(h_dist_class_bnd)] - h_dist_bnd_offset # Offset boundaries by radius
h_dist_class_bnd = h_dist_class_bnd.tolist()
h_dist_class_bnd.append(2) # Appending 2 to catch normalized inputs above 1
h_dist_class_bnd[0] = -2 # Making the lowest boundary -2 to catch normalized inputs below -1
height_num_classes = 9
height_labels = [str(num) for num in np.arange(0,height_num_classes)+1]
height_class_bnd = np.linspace(-1, 1, height_num_classes, endpoint=True).tolist()
height_class_bnd.append(2) # Appending 2 to catch normalized inputs above 1
height_class_bnd[0] = -2 # Making the lowest boundary -2 to catch normalized inputs below -1
ul_df["U2G_H_Dist_Class"] = pd.cut(ul_df.U2G_H_Dist_Norm, h_dist_class_bnd, right=False, include_lowest=True, labels=h_dist_labels)
ul_df["Height_Class"] = pd.cut(ul_df.Height_Norm, height_class_bnd, right=False, include_lowest=True, labels=height_labels)

# Compute CPT

In [22]:
parents_pkt_state = ["U2G_H_Dist_Class", "Height_Class", "UAV_Sending_Interval_Class", "Modulation"]
pkt_state_cpt = cpt_probs(ul_df, child="Packet_State", parents=parents_pkt_state)

# Validate CPT

In [30]:
# The below should be zero
len(pkt_state_cpt.loc[(pkt_state_cpt["Delay_Exceeded"]==0) & (pkt_state_cpt["QUEUE_OVERFLOW"]==0) & (pkt_state_cpt["RETRY_LIMIT_REACHED"]==0) & (pkt_state_cpt["Reliable"]==0)])

0