## Multinomial BN Model for UAV Comm. Reliability Prediction

## Imports

In [1]:
import pandas as pd
import numpy as np 
import math
import os
import pickle
import gc 
from tqdm import tqdm
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB

def generate_reliability_dataset(dataset_details_df, test_split=0.2):
    # df_dtypes = {"Horizontal_Distance": np.float64, "Height": np.int16,	"U2G_Distance": np.int32, "UAV_Sending_Interval": np.float64, "Mean_SINR": np.float64, "Std_Dev_SINR": np.float64,
    #              "Modulation": 'string', "Num_Sent": np.int32, "Num_Reliable": np.int32, "Num_Delay_Excd": np.int32, "Num_Incr_Rcvd": np.int32, "Num_Q_Overflow": np.int32}
    # dataset_details = pd.read_csv(dataset_details_csv, 
    #                               usecols = ["Mean_SINR", "Std_Dev_SINR", "UAV_Sending_Interval", "Modulation", "Num_Sent", "Num_Reliable", "Num_Delay_Excd",
    #                                          "Num_Incr_Rcvd", "Num_Q_Overflow"],
    #                               dtype=df_dtypes)
    df_train_list = []
    for row in tqdm(dataset_details_df.itertuples()):
        mean_sinr = row.Mean_SINR_Class
        std_dev_sinr = row.Std_Dev_SINR_Class
        uav_send_int = row.UAV_Sending_Interval_Class
        mcs = row.MCS
        num_reliable = row.Num_Reliable
        num_delay_excd = row.Num_Delay_Excd
        num_incr_rcvd = row.Num_Incr_Rcvd
        num_q_overflow = row.Num_Q_Overflow

        if num_reliable > 0:
            reliable_packets = pd.DataFrame({"Mean_SINR_Class": mean_sinr, "Std_Dev_SINR_Class": std_dev_sinr, "UAV_Sending_Interval_Class": uav_send_int, "MCS": mcs, "Packet_State": 0}, index=[0])
            reliable_packets = reliable_packets.loc[reliable_packets.index.repeat(num_reliable)]
        else:
            reliable_packets = pd.DataFrame({})

        if num_delay_excd > 0:
            delay_excd_packets = pd.DataFrame({"Mean_SINR_Class": mean_sinr, "Std_Dev_SINR_Class": std_dev_sinr, "UAV_Sending_Interval_Class": uav_send_int, "MCS": mcs, "Packet_State": 1}, index=[0])
            delay_excd_packets = delay_excd_packets.loc[delay_excd_packets.index.repeat(num_delay_excd)]
        else:
            delay_excd_packets = pd.DataFrame({})

        if num_q_overflow > 0:
            q_overflow_packets = pd.DataFrame({"Mean_SINR_Class": mean_sinr, "Std_Dev_SINR_Class": std_dev_sinr, "UAV_Sending_Interval_Class": uav_send_int, "MCS": mcs, "Packet_State": 2}, index=[0])
            q_overflow_packets = q_overflow_packets.loc[q_overflow_packets.index.repeat(num_q_overflow)]
        else:
            q_overflow_packets = pd.DataFrame({})

        if num_incr_rcvd > 0:
            incr_rcvd_packets = pd.DataFrame({"Mean_SINR_Class": mean_sinr, "Std_Dev_SINR_Class": std_dev_sinr, "UAV_Sending_Interval_Class": uav_send_int, "MCS": mcs, "Packet_State": 3}, index=[0])
            incr_rcvd_packets = incr_rcvd_packets.loc[incr_rcvd_packets.index.repeat(num_incr_rcvd)]
        else:
            incr_rcvd_packets = pd.DataFrame({})
        df_train_list.append(pd.concat([reliable_packets, delay_excd_packets, q_overflow_packets, incr_rcvd_packets]))

    df_train = pd.concat(df_train_list)
    return df_train

def get_mcs_index(df_in):
    '''
    Gets the MCS index based on modulation and bitrate column of the df_in
    '''
    df = df_in.copy()
    df["MCS"] = ''
    df.loc[(df["Modulation"] == "BPSK") & (df["Bitrate"] == 6.5), "MCS"] = 0 # MCS Index 0
    df.loc[(df["Modulation"] == "QPSK") & (df["Bitrate"] == 13), "MCS"] = 1 # MCS Index 0
    df.loc[(df["Modulation"] == "QPSK") & (df["Bitrate"] == 19.5), "MCS"] = 2 # MCS Index 0
    df.loc[(df["Modulation"] == "QAM16") & (df["Bitrate"] == 26), "MCS"] = 3 # MCS Index 0
    df.loc[(df["Modulation"] == "QAM16") & (df["Bitrate"] == 39), "MCS"] = 4 # MCS Index 0
    df.loc[(df["Modulation"] == "QAM64") & (df["Bitrate"] == 52), "MCS"] = 5 # MCS Index 0
    df.loc[(df["Modulation"] == "QAM64") & (df["Bitrate"] == 58.5), "MCS"] = 6 # MCS Index 0
    df.loc[(df["Modulation"] == "QAM64") & (df["Bitrate"] == 65), "MCS"] = 7 # MCS Index 0

    return df

## Data Processing

In [2]:
DATASET_PATH = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_DJISpark/data_processed/DJI_Spark_Downlink_Reliability.csv"

df_dtypes = {"Horizontal_Distance": np.float64, "Height": np.int16,	"U2G_Distance": np.int32, "UAV_Sending_Interval": np.float64, "Mean_SINR": np.float64, "Std_Dev_SINR": np.float64,
            "Num_Sent": np.int32, "Num_Reliable": np.int32, "Num_Delay_Excd": np.int32, "Num_Incr_Rcvd": np.int32, "Num_Q_Overflow": np.int32, "Modulation": str, "Bitrate": np.float64}
dataset_details_df = pd.read_csv(DATASET_PATH, 
                            usecols = ["Mean_SINR", "Std_Dev_SINR", "UAV_Sending_Interval", "Modulation", "Bitrate", "Num_Sent", "Num_Reliable", "Num_Delay_Excd",
                                        "Num_Incr_Rcvd", "Num_Q_Overflow"],
                            dtype=df_dtypes)
dataset_details_df = get_mcs_index(dataset_details_df)

# Change sending interval categorial to numeric
dataset_details_df["UAV_Sending_Interval_Class"] = dataset_details_df["UAV_Sending_Interval"].replace({10:0, 20:1, 66.7:2, 100:3})

# Quantize mean and std dev of sinr
num_bins = 100
_, mean_sinr_bins = pd.qcut(dataset_details_df.Mean_SINR, q=num_bins, retbins=True)
mean_sinr_bins = np.concatenate(([-np.inf], mean_sinr_bins[1:-1], [np.inf]))
_, std_dev_sinr_bins = pd.qcut(dataset_details_df.Std_Dev_SINR, q=num_bins, retbins=True)
std_dev_sinr_bins = np.concatenate(([-np.inf], std_dev_sinr_bins[1:-1], [np.inf]))

dataset_details_df["Mean_SINR_Class"] = pd.cut(dataset_details_df.Mean_SINR, mean_sinr_bins, right=True, include_lowest=False, labels=False)
dataset_details_df["Std_Dev_SINR_Class"] = pd.cut(dataset_details_df.Std_Dev_SINR, std_dev_sinr_bins, right=True, include_lowest=False, labels=False)


# # Generate dataset samples
df_train = generate_reliability_dataset(dataset_details_df)

X = df_train[["Mean_SINR_Class", "Std_Dev_SINR_Class", "UAV_Sending_Interval_Class", "MCS"]].values
packet_state_train = df_train['Packet_State'].values

76it [00:00, 373.15it/s]

34848it [01:24, 410.72it/s]


## BN Model Training

In [3]:
clf = MultinomialNB(force_alpha=True)
clf.fit(X[20001:30000], packet_state_train[20001:30000])

In [57]:
X_part

array([[99., 99.,  0.,  0.],
       [99., 99.,  0.,  0.],
       [99., 99.,  0.,  0.],
       ...,
       [14.,  0.,  0.,  0.],
       [14.,  0.,  0.,  0.],
       [14.,  0.,  0.,  0.]])

In [158]:
# Partial training
from sklearn.naive_bayes import CategoricalNB, ComplementNB, BernoulliNB
from sklearn.utils import shuffle
# df_part = df_train.loc[(df_train["Mean_SINR_Class"] == 99) & (df_train["Std_Dev_SINR_Class"] == 99)]
df_part = df_train.loc[(df_train["UAV_Sending_Interval_Class"] == 0) & (df_train["MCS"] == 0)]
# df_part = df_train.loc[(df_train["Mean_SINR_Class"] == 99) & (df_train["Std_Dev_SINR_Class"] == 99) & (df_train["UAV_Sending_Interval_Class"] == 0) & (df_train["MCS"] == 0)]

df_part = shuffle(df_part)

X_part = df_part[["Mean_SINR_Class", "Std_Dev_SINR_Class", "UAV_Sending_Interval_Class", "MCS"]].values
packet_state_part = df_part['Packet_State'].values
# clf_part = MultinomialNB(fit_prior=False)
clf_part = CategoricalNB(alpha=1, force_alpha=False, fit_prior=True)
# clf_part = ComplementNB(fit_prior=True)
clf_part.fit(X_part, packet_state_part)

In [159]:
clf_part.predict_proba([[99,99,0,0]]) 

array([[5.78154915e-04, 4.96416548e-01, 5.03005219e-01, 7.81499682e-08]])

In [88]:
df_part.loc[(df_part["Mean_SINR_Class"] == 99) & (df_part["Std_Dev_SINR_Class"] == 99)]["Packet_State"].value_counts()

Packet_State
2    75629
1    24333
0       36
3        2
Name: count, dtype: int64

In [70]:
df_train.loc[(df_train["Mean_SINR_Class"] == 75) & (df_train["Std_Dev_SINR_Class"] == 86) & (df_train["UAV_Sending_Interval_Class"] == 0) & (df_train["MCS"] == 0)]["Packet_State"].value_counts()

Packet_State
2    8666
1    1332
0       1
3       1
Name: count, dtype: int64

In [9]:
clf.predict([X[20001]])

array([2])

In [5]:
X[20001]

array([99., 99.,  0.,  0.])

In [4]:
clf.predict_proba([X[20001]])

array([[1.54655018e-04, 2.56687823e-01, 7.43143569e-01, 1.39535834e-05]])

In [14]:
df_train.head(10000)["Packet_State"].value_counts()

Packet_State
2    7590
1    2408
0       2
Name: count, dtype: int64