Compile the CPT File for Test Dataset

In [1]:
import csv
import pandas as pd # for data manipulation 
import numpy as np
import os, sys, glob, math, pickle
import cudf 
from tqdm import tqdm

# This function helps to calculate probability distribution, which goes into BBN (note, can handle up to 2 parents)
def cpt_probs(df, child, parents):
    try:
        # dependencies_arr = [pd.Categorical(df[parent],categories=df[parent].cat.categories.tolist()) for parent in parents]
        dependencies_arr = [df[parent] for parent in parents]
        # cpt = pd.crosstab(dependencies_arr, df[child], rownames=parents, colnames=[child], margins=False, normalize='index', dropna=False).sort_index().to_numpy().reshape(-1).tolist()
        cpt = pd.crosstab(dependencies_arr, df[child], rownames=parents, colnames=[child], margins=False, normalize='index', dropna=False).sort_index()
        return cpt
    except Exception as err:
        print(err)
        return None 

def cpt_probs_freq(df, child, parents):
    try:
        # dependencies_arr = [pd.Categorical(df[parent],categories=df[parent].cat.categories.tolist()) for parent in parents]
        dependencies_arr = [df[parent] for parent in parents]
        # cpt = pd.crosstab(dependencies_arr, df[child], rownames=parents, colnames=[child], margins=False, normalize='index', dropna=False).sort_index().to_numpy().reshape(-1).tolist()
        cpt = pd.crosstab(dependencies_arr, df[child], rownames=parents, colnames=[child], margins=False, dropna=False).sort_index()
        return cpt
    except Exception as err:
        print(err)
        return None

Get data from HDF5 file

In [2]:
data_type = "Downlink"

if data_type == "Downlink":
    df = pd.read_hdf("/home/research-student/omnetpp_sim_results/Test_Dataset_BPSK_6-5Mbps/Taguchi_Test_Cases_downlink.h5", data_type)
elif data_type == "Uplink":
    df = pd.read_hdf("/home/research-student/omnetpp_sim_results/Test_Dataset_BPSK_6-5Mbps/Taguchi_Test_Cases_uplink.h5", data_type)

Get Bin Intervals from NPY Files

In [3]:
sinr_bins = np.load("/home/research-student/omnet-fanet/cpt/Downlink/sinr_bins_dl.npy")
ber_bins = np.load("/home/research-student/omnet-fanet/cpt/Downlink/ber_bins_dl.npy")
delay_bins = np.load("/home/research-student/omnet-fanet/cpt/Downlink/delay_bins_dl.npy")
queueing_time_bins = np.load("/home/research-student/omnet-fanet/cpt/Downlink/queueing_time_bins_dl.npy")
throughput_bins = np.load("/home/research-student/omnet-fanet/cpt/Downlink/throughput_bins_dl.npy")
jitter_bins = np.load("/home/research-student/omnet-fanet/cpt/Downlink/jitter_bins_dl.npy")

Compute CPT for Test Dataset

In [4]:
delay_threshold = 0.04

df = df[df['U2G_SINR'].notna()] # Filter out rows with missing crucial information
classes_df = pd.DataFrame() # Created an empty df to store classes data to reduce size of df that need to work with
# First, discretise the values to classes
h_dist_labels = ['vs','s','m','l','vl']
height_labels = ['vs','s','m','l','vl']
num_members_labels = ['vs','s','m','l','vl']
sending_interval_labels = ['vs','s','m','l','vl']
pkt_size_labels = ['vs','s','m','l','vl']
sinr_labels = ['vs','s','m','l','vl']
delay_labels = ['vs','s','m','l','vl']
throughput_labels = ['s','m','l']
queueing_labels = ['s','m','l']
ber_labels = ['vs','s','m','l','vl']
jitter_labels = ['s','m','l']

# Independent vars
classes_df["H_Dist_Class"] = pd.cut(df.U2G_H_Dist, [0,100,200,300,400,501], right=False, include_lowest=True, labels=h_dist_labels)
classes_df["Height_Class"] = pd.cut(df.Height, [1,25,49,73,97,121], right=False, include_lowest=True, labels=height_labels)
classes_df["Num_Members_Class"] = pd.cut(df.Num_Members, [2,8,16,24,32,40], right=False, include_lowest=True, labels=num_members_labels)
classes_df["Sending_Interval_Class"] = pd.cut(df.Mean_Sending_Interval, [40,232,424,616,808,1000], right=False, include_lowest=True, labels=sending_interval_labels)
classes_df["Packet_Size_Class"] = pd.cut(df.Bytes, [24,248,472,696,920,1144], right=False, include_lowest=True, labels=pkt_size_labels)
# Second layer
classes_df["SINR_Class"] = pd.cut(df.U2G_SINR, sinr_bins, right=False, include_lowest=True, labels=sinr_labels)
classes_df["Delay_Class"] = pd.cut(df.Delay, delay_bins, right=False, include_lowest=True, labels=delay_labels)
classes_df["Throughput_Class"] = pd.cut(df.Throughput, throughput_bins, right=False, include_lowest=True, labels=throughput_labels)
classes_df["Queueing_Time_Class"] = pd.cut(df.Queueing_Time, queueing_time_bins, right=False, include_lowest=True, labels=queueing_labels)
classes_df["BER_Class"] = pd.cut(df.U2G_BER, ber_bins, right=False, include_lowest=True, labels=ber_labels)
classes_df["Jitter_Class"] = pd.cut(df.Jitter, jitter_bins, right=False, include_lowest=True, labels=jitter_labels)
# Output vars
classes_df["Reliable"] = (df["Packet_State"] == "Reliable")
classes_df["Delay_Exceeded"] = (df["Delay"] >= delay_threshold)
classes_df["Incorrectly_Received"] = df["Incorrectly_Received"]
classes_df["Queue_Overflow"] = df["Queue_Overflow"]

# Calculate the conditional probabilities table for each output layer class
parents_1 = ["H_Dist_Class", "Height_Class", "Num_Members_Class", "Sending_Interval_Class", "Packet_Size_Class"] 
reliability_cpt = cpt_probs(classes_df, child="Reliable", parents=parents_1)
incorrect_rcvd_cpt = cpt_probs(classes_df, child="Incorrectly_Received", parents=parents_1)
delay_exceeded_cpt = cpt_probs(classes_df, child="Delay_Exceeded", parents=parents_1)
queue_overflow_cpt = cpt_probs(classes_df, child="Queue_Overflow", parents=parents_1)

# Calculate the conditional probabilities table for each second layer class
parents_1 = ["H_Dist_Class", "Height_Class", "Num_Members_Class", "Sending_Interval_Class", "Packet_Size_Class"] 
sinr_cpt = cpt_probs(classes_df, child="SINR_Class", parents=parents_1)
ber_cpt = cpt_probs(classes_df, child="BER_Class", parents=parents_1)
delay_cpt = cpt_probs(classes_df, child="Delay_Class", parents=parents_1)
queueing_time_cpt = cpt_probs(classes_df, child="Queueing_Time_Class", parents=parents_1)
throughput_cpt = cpt_probs(classes_df, child="Throughput_Class", parents=parents_1)
jitter_cpt = cpt_probs(classes_df, child="Jitter_Class", parents=parents_1)

In [63]:
reliability_cpt_freq = cpt_probs_freq(classes_df, child="Reliable", parents=parents_1)
reliability_cpt_freq.loc[(reliability_cpt_freq==0).all(axis=1).values] = np.NaN
reliability_cpt_freq.dropna(axis=0, how='all', inplace=True)
reliability_cpt_freq.to_csv("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_reliability_{}_cpt_freq.csv".format(data_type,data_type))
reliability_cpt_freq.to_pickle("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_reliability_{}_cpt_freq.pkl".format(data_type,data_type))

In [73]:
sinr_cptr_freq = cpt_probs_freq(classes_df, child="SINR_Class", parents=parents_1)
sinr_cptr_freq.loc[(sinr_cptr_freq==0).all(axis=1).values] = np.NaN
sinr_cptr_freq.dropna(axis=0, how='all', inplace=True)
sinr_cptr_freq.to_csv("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_sinr_{}_cpt_freq.csv".format(data_type,data_type))
sinr_cptr_freq.to_pickle("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_sinr_{}_cpt_freq.pkl".format(data_type,data_type))

Drop Empty Rows

In [64]:
# Drop empty rows
# Output layers
reliability_cpt.loc[(reliability_cpt==0).all(axis=1).values] = np.NaN
reliability_cpt.dropna(axis=0, how='all', inplace=True)
incorrect_rcvd_cpt.loc[(incorrect_rcvd_cpt==0).all(axis=1).values] = np.NaN
incorrect_rcvd_cpt.dropna(axis=0, how='all', inplace=True)
delay_exceeded_cpt.loc[(delay_exceeded_cpt==0).all(axis=1).values] = np.NaN
delay_exceeded_cpt.dropna(axis=0, how='all', inplace=True)
queue_overflow_cpt.loc[(queue_overflow_cpt==0).all(axis=1).values] = np.NaN
queue_overflow_cpt.dropna(axis=0, how='all', inplace=True)

In [65]:
# Drop empty rows
# Second layers
sinr_cpt.loc[(sinr_cpt==0).all(axis=1).values] = np.NaN
sinr_cpt.dropna(axis=0, how='all', inplace=True)
ber_cpt.loc[(ber_cpt==0).all(axis=1).values] = np.NaN
ber_cpt.dropna(axis=0, how='all', inplace=True)
delay_cpt.loc[(delay_cpt==0).all(axis=1).values] = np.NaN
delay_cpt.dropna(axis=0, how='all', inplace=True)
queueing_time_cpt.loc[(queueing_time_cpt==0).all(axis=1).values] = np.NaN
queueing_time_cpt.dropna(axis=0, how='all', inplace=True)
throughput_cpt.loc[(throughput_cpt==0).all(axis=1).values] = np.NaN
throughput_cpt.dropna(axis=0, how='all', inplace=True)
jitter_cpt.loc[(jitter_cpt==0).all(axis=1).values] = np.NaN
jitter_cpt.dropna(axis=0, how='all', inplace=True)

Save Test Data CPT

In [66]:
# As CSV files
reliability_cpt.to_csv("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_reliability_{}_cpt.csv".format(data_type,data_type))
incorrect_rcvd_cpt.to_csv("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_incorrect_rcvd_{}_cpt.csv".format(data_type,data_type))
delay_exceeded_cpt.to_csv("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_delay_exceeded_{}_cpt.csv".format(data_type,data_type))
queue_overflow_cpt.to_csv("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_queue_overflow_{}_cpt.csv".format(data_type,data_type))
sinr_cpt.to_csv("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_sinr_{}_cpt.csv".format(data_type,data_type))
ber_cpt.to_csv("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_ber_{}_cpt.csv".format(data_type,data_type))
delay_cpt.to_csv("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_delay_{}_cpt.csv".format(data_type,data_type))
queueing_time_cpt.to_csv("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_queueing_time_{}_cpt.csv".format(data_type,data_type))
throughput_cpt.to_csv("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_throughput_{}_cpt.csv".format(data_type,data_type))
jitter_cpt.to_csv("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_jitter_{}_cpt.csv".format(data_type,data_type))

In [68]:
# As pickle files
reliability_cpt.to_pickle("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_reliability_{}_cpt.pkl".format(data_type,data_type))
incorrect_rcvd_cpt.to_pickle("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_incorrect_rcvd_{}_cpt.pkl".format(data_type,data_type))
delay_exceeded_cpt.to_pickle("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_delay_exceeded_{}_cpt.pkl".format(data_type,data_type))
queue_overflow_cpt.to_pickle("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_queue_overflow_{}_cpt.pkl".format(data_type,data_type))
sinr_cpt.to_pickle("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_sinr_{}_cpt.pkl".format(data_type,data_type))
ber_cpt.to_pickle("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_ber_{}_cpt.pkl".format(data_type,data_type))
delay_cpt.to_pickle("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_delay_{}_cpt.pkl".format(data_type,data_type))
queueing_time_cpt.to_pickle("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_queueing_time_{}_cpt.pkl".format(data_type,data_type))
throughput_cpt.to_pickle("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_throughput_{}_cpt.pkl".format(data_type,data_type))
jitter_cpt.to_pickle("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_jitter_{}_cpt.pkl".format(data_type,data_type))

Extras

In [5]:
parents_trimmed = ["H_Dist_Class", "Height_Class"] 
incorrect_rcvd_trimmed_cpt = cpt_probs(classes_df, child="Incorrectly_Received", parents=parents_trimmed)
incorrect_rcvd_trimmed_cpt.loc[(incorrect_rcvd_trimmed_cpt==0).all(axis=1).values] = np.NaN
incorrect_rcvd_trimmed_cpt.dropna(axis=0, how='all', inplace=True)
incorrect_rcvd_trimmed_cpt.to_csv("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_incorrect_rcvd_trimmed_{}_cpt.csv".format(data_type,data_type))
incorrect_rcvd_trimmed_cpt.to_pickle("/home/research-student/omnet-fanet/cpt/Test_Data/{}/test_data_incorrect_rcvd_trimmed_{}_cpt.pkl".format(data_type,data_type))