In [25]:
import csv
import pandas as pd # for data manipulation 
import numpy as np
import networkx as nx # for drawing graphs
import matplotlib.pyplot as plt # for drawing graphs
import os, sys, glob, math, pickle

# This function helps to calculate probability distribution, which goes into BBN (note, can handle up to 2 parents)
def cpt_probs(df, child, parents):
    try:
        # dependencies_arr = [pd.Categorical(df[parent],categories=df[parent].cat.categories.tolist()) for parent in parents]
        dependencies_arr = [df[parent] for parent in parents]
        # cpt = pd.crosstab(dependencies_arr, df[child], rownames=parents, colnames=[child], margins=False, normalize='index', dropna=False).sort_index().to_numpy().reshape(-1).tolist()
        cpt = pd.crosstab(dependencies_arr, df[child], rownames=parents, colnames=[child], margins=False, normalize='index', dropna=False).sort_index()
        return cpt
    except Exception as err:
        print(err)
        return None 

def euclidean_dist(row):
    # Function to calc euclidean distance on every df row 
    euc_dist = math.sqrt(row["U2G_Distance"]**2 + row["Height"]**2)
    return euc_dist

In [2]:
csv_data_path = "/home/research-student/omnetpp_sim_results/No_ARP_CSV"
downlink_csv = os.path.join(csv_data_path, "FANET_downlink_raw.csv")
downlink_df = pd.read_csv(downlink_csv)

# Add in some extra values I forgot to add in in the pre-processing step
downlink_df["U2G_H_Dist"] = downlink_df.apply(lambda row: euclidean_dist(row), axis=1)
e2e_delay = downlink_df["Delay"].to_numpy()
jitter = e2e_delay[1:] - e2e_delay[0:-1]
jitter = np.insert(jitter,0,0)
downlink_df["Jitter"] = jitter

In [56]:
# First, discretise the values to classes
h_dist_labels = ['vs','s','m','l','vl']
height_labels = ['s','m','l']
num_members_labels = ['s','m','l']
sending_interval_labels = ['s','m','l']
pkt_size_labels = ['s','m','l']
sinr_labels = ['vs','s','m','l','vl']
delay_labels = ['vs','s','m','l','vl']
throughput_labels = ['s','l']
queueing_labels = ['s','l']
ber_labels = ['vs','s','m','l','vl']
jitter_labels = ['s','m','l']
# Independent vars
downlink_df["H_Dist_Class"] = pd.cut(downlink_df.U2G_H_Dist, bins=5, labels=h_dist_labels)
downlink_df["Height_Class"] = pd.cut(downlink_df.Height, bins=3, labels=height_labels)
downlink_df["Num_Members_Class"] = pd.cut(downlink_df.Num_Members, bins=3, labels=num_members_labels)
downlink_df["Sending_Interval_Class"] = pd.cut(downlink_df.Sending_Interval, bins=3, labels=sending_interval_labels)
downlink_df["Packet_Size_Class"] = pd.cut(downlink_df.Bytes, bins=3, labels=pkt_size_labels)
# Second layer
downlink_df["SINR_Class"] = pd.qcut(downlink_df.U2G_SINR, q=5, labels=sinr_labels)
downlink_df["Delay_Class"] = pd.qcut(downlink_df.Delay, q=5, labels=delay_labels)
downlink_df["Throughput_Class"] = pd.qcut(downlink_df.Throughput, q=3, labels=throughput_labels, duplicates='drop')
downlink_df["Queueing_Time_Class"] = pd.qcut(downlink_df.Queueing_Time, q=3, labels=queueing_labels, duplicates='drop')
downlink_df["BER_Class"] = pd.qcut(downlink_df.U2G_BER, q=5, labels=ber_labels)
downlink_df["Jitter_Class"] = pd.qcut(downlink_df.Jitter, q=3, labels=jitter_labels)

In [57]:
# Let's append a dummy row to the df for the "unknown" class
lastRow = pd.DataFrame([{"H_Dist_Class": "U", "Height_Class": "U","Num_Members_Class": "U", "Sending_Interval_Class": "U",
                        "Packet_Size_Class": "U", "SINR_Class": "U","Delay_Class": "U", "Throughput_Class": "U",
                        "Queueing_Time_Class": "U", "BER_Class": "U","Jitter_Class": "U", "Reliable": 0,
                        "Delay_Exceeded": 0, "Number_Dropped": 0}])
downlink_df = pd.concat([downlink_df, lastRow], ignore_index=True)

In [59]:
# Calculate the conditional probabilities table for each second layer class
parents_1 = ["H_Dist_Class", "Height_Class", "Num_Members_Class", "Sending_Interval_Class", "Packet_Size_Class"]
sinr_cpt = cpt_probs(downlink_df, child="SINR_Class", parents=parents_1).reindex()
delay_cpt = cpt_probs(downlink_df, child="Delay_Class", parents=parents_1)
throughput_cpt = cpt_probs(downlink_df, child="Throughput_Class", parents=parents_1)
queueing_cpt = cpt_probs(downlink_df, child="Queueing_Time_Class", parents=parents_1)
ber_cpt = cpt_probs(downlink_df, child="BER_Class", parents=parents_1)
jitter_cpt = cpt_probs(downlink_df, child="Jitter_Class", parents=parents_1)
# Add unknown "U" class for cases that are not seen in data
h_dist_labels.append("U")
height_labels.append("U")
num_members_labels.append("U")
sending_interval_labels.append("U")
pkt_size_labels.append("U")
sinr_labels.append("U")
delay_labels.append("U")
throughput_labels.append("U")
queueing_labels.append("U")
ber_labels.append("U")
jitter_labels.append("U")
parents_1_labels = [h_dist_labels,height_labels,num_members_labels,sending_interval_labels,pkt_size_labels]
sinr_cpt.loc[(sinr_cpt["vs"] == 0) & (sinr_cpt["s"] == 0) & (sinr_cpt["m"] == 0) & (sinr_cpt["l"] == 0) & (sinr_cpt["vl"] == 0), "U"] = 1
sinr_cpt = sinr_cpt.reindex(pd.MultiIndex.from_product(parents_1_labels, names=parents_1))[sinr_labels] # Rearrange rows and columns of cpt. This is important when building the Bayesian Network
delay_cpt["U"] = 0
delay_cpt.loc[(delay_cpt["vs"] == 0) & (delay_cpt["s"] == 0) & (delay_cpt["m"] == 0) & (delay_cpt["l"] == 0) & (delay_cpt["vl"] == 0), "U"] = 1
delay_cpt = delay_cpt.reindex(pd.MultiIndex.from_product(parents_1_labels, names=parents_1))[delay_labels] # Rearrange rows and columns of cpt. This is important when building the Bayesian Network
throughput_cpt["U"] = 0
throughput_cpt.loc[(throughput_cpt["s"] == 0) & (throughput_cpt["l"] == 0), "U"] = 1
throughput_cpt = throughput_cpt.reindex(pd.MultiIndex.from_product(parents_1_labels, names=parents_1))[throughput_labels] # Rearrange rows and columns of cpt. This is important when building the Bayesian Network
queueing_cpt["U"] = 0
queueing_cpt.loc[(queueing_cpt["s"] == 0) & (queueing_cpt["l"] == 0), "U"] = 1
queueing_cpt = queueing_cpt.reindex(pd.MultiIndex.from_product(parents_1_labels, names=parents_1))[queueing_labels] # Rearrange rows and columns of cpt. This is important when building the Bayesian Network
ber_cpt["U"] = 0
ber_cpt.loc[(ber_cpt["vs"] == 0) & (ber_cpt["s"] == 0) & (ber_cpt["m"] == 0) & (ber_cpt["l"] == 0) & (ber_cpt["vl"] == 0), "U"] = 1
ber_cpt = ber_cpt.reindex(pd.MultiIndex.from_product(parents_1_labels, names=parents_1))[ber_labels] # Rearrange rows and columns of cpt. This is important when building the Bayesian Network
jitter_cpt["U"] = 0
jitter_cpt.loc[(jitter_cpt["s"] == 0) & (jitter_cpt["m"] == 0) & (jitter_cpt["l"] == 0), "U"] = 1
jitter_cpt = jitter_cpt.reindex(pd.MultiIndex.from_product(parents_1_labels, names=parents_1))[jitter_labels] # Rearrange rows and columns of cpt. This is important when building the Bayesian Network

parents_2 = ["SINR_Class", "Delay_Class", "Throughput_Class", "Queueing_Time_Class", "BER_Class", "Jitter_Class"]
reliability_cpt = cpt_probs(downlink_df, child="Reliable", parents=parents_2)
delay_exceeded_cpt = cpt_probs(downlink_df, child="Delay_Exceeded", parents=parents_2)
num_dropped_cpt = cpt_probs(downlink_df, child="Number_Dropped", parents=parents_2)
# Add unknown "U" class for cases that are not seen in data
parents_2_labels = [sinr_labels,delay_labels,throughput_labels,queueing_labels,ber_labels,jitter_labels]
reliability_labels = [0,1,"U"]
delay_exceeded_labels = [0,1,"U"]
num_dropped_labels = [0,1,2,3,4,5,6,7,8,"U"]
reliability_cpt["U"] = 0
reliability_cpt.loc[(reliability_cpt[0] == 0) & (reliability_cpt[1] == 0), "U"] = 1
reliability_cpt = reliability_cpt.reindex(pd.MultiIndex.from_product(parents_2_labels, names=parents_2))[reliability_labels] # Rearrange rows and columns of cpt. This is important when building the Bayesian Network
delay_exceeded_cpt["U"] = 0
delay_exceeded_cpt.loc[(delay_exceeded_cpt[0] == 0) & (delay_exceeded_cpt[1] == 0), "U"] = 1
delay_exceeded_cpt = delay_exceeded_cpt.reindex(pd.MultiIndex.from_product(parents_2_labels, names=parents_2))[delay_exceeded_labels] # Rearrange rows and columns of cpt. This is important when building the Bayesian Network
num_dropped_cpt["U"] = 0
num_dropped_cpt.loc[(num_dropped_cpt[0] == 0) & (num_dropped_cpt[1] == 0) & (num_dropped_cpt[2] == 0) & (num_dropped_cpt[3] == 0) & (num_dropped_cpt[4] == 0) & (num_dropped_cpt[5] == 0) & (num_dropped_cpt[6] == 0) & (num_dropped_cpt[7] == 0) & (num_dropped_cpt[8] == 0), "U"] = 1
num_dropped_cpt = num_dropped_cpt.reindex(pd.MultiIndex.from_product(parents_2_labels, names=parents_2))[num_dropped_labels] # Rearrange rows and columns of cpt. This is important when building the Bayesian Network

In [61]:
# Save the CPTs (to pickle)
sinr_cpt.to_pickle("sinr_cpt.pkl")
delay_cpt.to_pickle("delay_cpt.pkl")
throughput_cpt.to_pickle("throughput_cpt.pkl")
queueing_cpt.to_pickle("queueing_cpt.pkl")
ber_cpt.to_pickle("ber_cpt.pkl")
jitter_cpt.to_pickle("jitter_cpt.pkl")
reliability_cpt.to_pickle("reliability_cpt.pkl")
delay_exceeded_cpt.to_pickle("delay_exceeded_cpt.pkl")
num_dropped_cpt.to_pickle("num_dropped_cpt.pkl")

In [60]:
sinr_cpt

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SINR_Class,vs,s,m,l,vl,U
H_Dist_Class,Height_Class,Num_Members_Class,Sending_Interval_Class,Packet_Size_Class,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
vs,s,s,s,s,0.000146,0.001360,0.014965,0.152026,0.831503,0.0
vs,s,s,s,m,0.000146,0.000951,0.017767,0.135922,0.845215,0.0
vs,s,s,s,l,0.000147,0.001027,0.014302,0.125202,0.859322,0.0
vs,s,s,s,U,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
vs,s,s,m,s,0.000000,0.001583,0.021589,0.133995,0.842832,0.0
...,...,...,...,...,...,...,...,...,...,...
U,U,U,l,U,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
U,U,U,U,s,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
U,U,U,U,m,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
U,U,U,U,l,0.000000,0.000000,0.000000,0.000000,0.000000,1.0


In [58]:
downlink_df.tail()

Unnamed: 0,RxTime,TxTime,Packet_Name,Bytes,RSSI,SINR,U2G_SINR,U2U_SINR,BER,U2G_BER,...,Height_Class,Num_Members_Class,Sending_Interval_Class,Packet_Size_Class,SINR_Class,Delay_Class,Throughput_Class,Queueing_Time_Class,BER_Class,Jitter_Class
2960971,509.662415,509.6,CNCData-5096,900.0,5.35094e-12,1.130248,1.130248,,0.239542,0.239542,...,m,s,s,l,vs,m,s,s,vl,m
2960972,509.746915,509.7,CNCData-5097,900.0,7.573115e-12,2.014906,2.014906,,0.224337,0.224337,...,m,s,s,l,vs,s,s,s,vl,s
2960973,509.85384,509.8,CNCData-5098,900.0,8.33782e-12,2.319345,2.319345,,0.224824,0.224824,...,m,s,s,l,vs,s,s,s,vl,m
2960974,509.94992,509.9,CNCData-5099,900.0,8.962003e-12,2.56784,2.56784,,0.212293,0.212293,...,m,s,s,l,vs,s,s,s,vl,m
2960975,,,,,,,,,,,...,U,U,U,U,U,U,U,U,U,U


In [44]:
downlink_df.drop(downlink_df.tail(1).index, inplace=True)