# Import stuff

In [9]:
import pandas as pd # for data manipulation 
import numpy as np
import glob, math
import cudf 
from tqdm import tqdm
from scipy import special
from sklearn.model_selection import train_test_split

def h_dist_calc(row):
    # Function to calc euclidean distance on every df row 
    h_dist = math.sqrt(row["U2G_Distance"]**2 - row["Height"]**2)
    return h_dist

def q_func(x):
    q = 0.5 - 0.5*special.erf(x / np.sqrt(2))
    return q

def friis_calc(P,freq,dist,ple):
    '''
    Friis path loss equation
    P = Tx transmit power
    freq = Signal frequency
    dist = Transmission distance
    ple = Path loss exponent
    '''
    propagation_speed = 299792458
    l = propagation_speed / freq
    h_pl = P * l**2 / (16*math.pi**2)
    P_Rx = h_pl * dist**(-ple)
    return P_Rx

def plos_calc(h_dist, height_tx, height_rx, env='suburban'):
    '''
    % This function implements the LoS probability model from the paper
    % "Blockage Modeling for Inter-layer UAVs Communications in Urban
    % Environments" 
    % param h_dist    : horizontal distance between Tx and Rx (m)
    % param height_tx : height of Tx
    % param height_rx : height of Rx
    '''
    if env == 'suburban':
        a1 = 0.1
        a2 = 7.5e-4
        a3 = 8
    
    delta_h = height_tx - height_rx
    # pow_factor = 2 * h_dist * math.sqrt(a1*a2/math.pi) + a1 # NOTE: Use this pow_factor if assuming PPP building dist.
    pow_factor = h_dist * math.sqrt(a1*a2) # NOTE: Use this pow_factor if assuming ITU-R assumptions.
    if delta_h == 0:
        p = (1 - math.exp((-(height_tx)**2) / (2*a3**2))) ** pow_factor
    else:
        if delta_h < 0:
            h1 = height_rx
            h2 = height_tx
        else:
            h1 = height_tx
            h2 = height_rx
        delta_h = abs(delta_h)
        p = (1 - (math.sqrt(2*math.pi)*a3 / delta_h) * abs(q_func(h1/a3) - q_func(h2/a3))) ** pow_factor
    return p

def sinr_lognormal_approx(h_dist, height, env='suburban'):
    '''
    To approximate the SNR from signal considering multipath fading and shadowing
    Assuming no interference due to CSMA, and fixed noise
    Inputs:
    h_dist = Horizontal Distance between Tx and Rx
    height = Height difference between Tx and Rx
    env = The operating environment (currently only suburban supported)
    '''
    # Signal properties
    P_Tx_dBm = 20 # Transmit power of 
    P_Tx = 10**(P_Tx_dBm/10) / 1000
    freq = 2.4e9 # Channel frequency (Hz)
    noise_dBm = -86
    noise = 10**(noise_dBm/10) / 1000
    if env == "suburban":
        # ENV Parameters Constants ----------------------------------
        # n_min = 2
        # n_max = 2.75
        # K_dB_min = 7.8
        # K_dB_max = 17.5
        # K_min = 10**(K_dB_min/10)
        # K_max = 10**(K_dB_max/10)
        # alpha = 11.25 # Env parameters for logarithm std dev of shadowing 
        # beta = 0.06 # Env parameters for logarithm std dev of shadowing 
        n_min = 2
        n_max = 2.75
        K_dB_min = 1.4922
        K_dB_max = 12.2272
        K_min = 10**(K_dB_min/10)
        K_max = 10**(K_dB_max/10)
        alpha = 11.1852 # Env parameters for logarithm std dev of shadowing 
        beta = 0.06 # Env parameters for logarithm std dev of shadowing 
        # -----------------------------------------------------------
    # Calculate fading parameters
    PLoS = plos_calc(h_dist, 0, height, env='suburban')
    theta_Rx = math.atan2(height, h_dist) * 180 / math.pi # Elevation angle in degrees
    ple = (n_min - n_max) * PLoS + n_max # Path loss exponent
    sigma_phi_dB = alpha*math.exp(-beta*theta_Rx)
    sigma_phi = 10**(sigma_phi_dB/10) # Logarithmic std dev of shadowing
    K = K_min * math.exp(math.log(K_max/K_min) * PLoS**2)
    omega = 1 # Omega of NCS (Rician)
    dist = math.sqrt(h_dist**2 + height**2)
    P_Rx = friis_calc(P_Tx, freq, dist, ple)
    # Approximate L-NCS RV (which is the SNR) as lognormal
    eta = math.log(10) / 10
    mu_phi = 10*math.log10(P_Rx)
    E_phi = math.exp(eta*mu_phi + eta**2*sigma_phi**2/2) # Mean of shadowing RV
    var_phi = math.exp(2*eta*mu_phi+eta**2*sigma_phi**2)*(math.exp(eta**2*sigma_phi**2)-1) # Variance of shadowing RV
    E_chi = (special.gamma(1+1)/(1+K))*special.hyp1f1(-1,1,-K)*omega
    var_chi = (special.gamma(1+2)/(1+K)**2)*special.hyp1f1(-2,1,-K)*omega**2 - E_chi**2
    E_SNR = E_phi * E_chi / noise # Theoretical mean of SINR
    var_SNR = ((var_phi+E_phi**2)*(var_chi+E_chi**2) - E_phi**2 * E_chi**2) / noise**2
    std_dev_SNR = math.sqrt(var_SNR)
    # sigma_ln = math.sqrt(math.log(var_SNR/E_SNR**2 + 1))
    # mu_ln = math.log(E_SNR) - sigma_ln**2/2
    return E_SNR, std_dev_SNR



# Compile FANET dataset from processed CSV files

In [4]:
# Modified Date: 18/04/2023
# Modified for new traffic model
# num_UAVs = 8
# processed_data_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_64QAM_65Mbps_Hovering_NoVideo/{}UAVs_Exp1_processed".format(num_UAVs)
processed_data_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/QPSK_processed"
# pkt_state_cat = cudf.CategoricalDtype(categories=['Reliable', 'Delay_Exceeded', 'RETRY_LIMIT_REACHED', 'QUEUE_OVERFLOW', 'FAILED', 'INTERFACE_DOWN'])
df_dtypes = {"TxTime": np.float32, "U2G_Distance": np.float32, "Height": np.int16,	"Num_Members": np.int16, "UAV_Sending_Interval": np.int16, "Bytes": np.int16, 
            "U2G_SINR": np.float32, "U2G_BER": np.float32, "Delay": np.float32, "Throughput": np.float32, "Queueing_Time": np.float32, "Packet_State": 'string', 
            "Retry_Count": np.int8, "Incorrectly_Received": np.int8, "Queue_Overflow": np.int8, "Packet_Name": 'string'}

# Process and save uplink DF
uplink_csvs = glob.glob(processed_data_path + "/*_uplink.csv")
ul_df_list = []
for csv_file in tqdm(uplink_csvs):
    df = cudf.read_csv(csv_file, 
                    usecols = ['Packet_Name','U2G_H_Dist', 'Height', "Num_Members", "UAV_Sending_Interval", "Bytes", "U2G_SINR", "U2G_BER", 
                               "Delay", "Throughput", "Packet_State", "Retry_Count", "Incorrectly_Received", "Queue_Overflow", "Mean_SINR", "Std_Dev_SINR"],
                    dtype=df_dtypes)
    # df["U2G_H_Dist"] = df.apply(h_dist_calc, axis=1)
    # df[['Mean_SINR',"Std_Dev_SINR"]]= df.apply(lambda row: sinr_lognormal_approx(row['U2G_H_Dist'],row['Height']),axis=1,result_type='expand')
    # Filter out rows where mean / std dev of sinr is NaN
    df = df[df['Mean_SINR'].notna()]
    df = df[df['Std_Dev_SINR'].notna()]
    # Let's cap the number of rows for each scenario at 100,000 packets for DL
    if len(df.index) > 100000:
        df = df.head(100000)
    ul_df_list.append(df.to_pandas())
ul_df = pd.concat(ul_df_list, ignore_index=True)
# ul_df["Packet_State"] = ul_df["Packet_State"].astype('object')
# ul_df.to_hdf(processed_data_path + "_uplink.h5", key='{}_UAVs'.format(num_UAVs), format='table')
ul_df.to_csv(processed_data_path + "_uplink.csv")

# Process and save downlink DF
downlink_csvs = glob.glob(processed_data_path + "/*_downlink.csv")
dl_df_list = []
for csv_file in tqdm(downlink_csvs):
    df = cudf.read_csv(csv_file, 
                    usecols = ['Packet_Name','U2G_H_Dist', 'Height', "Num_Members", "UAV_Sending_Interval", "Bytes", "U2G_SINR", "U2G_BER", 
                               "Delay", "Throughput", "Packet_State", "Retry_Count", "Incorrectly_Received", "Queue_Overflow", "Mean_SINR", "Std_Dev_SINR"],
                    dtype=df_dtypes)
    # df["U2G_H_Dist"] = df.apply(h_dist_calc, axis=1)
    # df[['Mean_SINR',"Std_Dev_SINR"]]= df.apply(lambda row: sinr_lognormal_approx(row['U2G_H_Dist'],row['Height']),axis=1,result_type='expand')
    # Filter out rows where mean / std dev of sinr is NaN
    df = df[df['Mean_SINR'].notna()]
    df = df[df['Std_Dev_SINR'].notna()]
    # Let's cap the number of rows for each scenario at 100,000 packets for DL
    if len(df.index) > 100000:
        df = df.head(100000)
    dl_df_list.append(df.to_pandas())
dl_df = pd.concat(dl_df_list, ignore_index=True)
# dl_df["Packet_State"] = dl_df["Packet_State"].astype('object')
# dl_df.to_hdf(processed_data_path + "_downlink.h5", key='{}_UAVs'.format(num_UAVs), format='table')
dl_df.to_csv(processed_data_path + "_downlink.csv")


100%|██████████| 1125/1125 [00:27<00:00, 40.92it/s]


# Split Dataset into Train and "Hold Out" (No Video Case)

In [10]:
# Date: 07/06/2023
# Split the training dataset into train and hold out, useful for calibration. IMPORTANT: We split the dataset for each scenario first before concatenating
HOLDOUT_SPLIT = 0.2
MAX_NUM_PACKETS = 100000 # The maximum number of packets from each scenario
processed_data_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/QPSK_processed"
# pkt_state_cat = cudf.CategoricalDtype(categories=['Reliable', 'Delay_Exceeded', 'RETRY_LIMIT_REACHED', 'QUEUE_OVERFLOW', 'FAILED', 'INTERFACE_DOWN'])
df_dtypes = {"TxTime": np.float32, "U2G_Distance": np.float32, "Height": np.int16,	"Num_Members": np.int16, "UAV_Sending_Interval": np.int16, "Bytes": np.int16, 
            "U2G_SINR": np.float32, "U2G_BER": np.float32, "Delay": np.float32, "Throughput": np.float32, "Queueing_Time": np.float32, "Packet_State": 'string', 
            "Retry_Count": np.int8, "Incorrectly_Received": np.int8, "Queue_Overflow": np.int8, "Packet_Name": 'string'}

# Process and save uplink DF
uplink_csvs = glob.glob(processed_data_path + "/*_uplink.csv")
ul_df_train_list = []
ul_df_hold_out_list = []
for csv_file in tqdm(uplink_csvs):
    df = cudf.read_csv(csv_file, 
                    usecols = ['Packet_Name','U2G_H_Dist', 'Height', "Num_Members", "UAV_Sending_Interval", "Bytes", "U2G_SINR", "U2G_BER", 
                               "Delay", "Throughput", "Packet_State", "Retry_Count", "Mean_SINR", "Std_Dev_SINR"],
                    dtype=df_dtypes)
    # df["U2G_H_Dist"] = df.apply(h_dist_calc, axis=1)
    # df[['Mean_SINR',"Std_Dev_SINR"]]= df.apply(lambda row: sinr_lognormal_approx(row['U2G_H_Dist'],row['Height']),axis=1,result_type='expand')
    # Filter out rows where mean / std dev of sinr is NaN
    df = df[df['Mean_SINR'].notna()]
    df = df[df['Std_Dev_SINR'].notna()]
    # Let's cap the number of rows for each scenario at 100,000 packets for DL
    if len(df.index) > MAX_NUM_PACKETS:
        df = df.head(MAX_NUM_PACKETS)
    df_train, df_hold_out = train_test_split(df, test_size=HOLDOUT_SPLIT, random_state=40, shuffle=False)
    ul_df_train_list.append(df_train.to_pandas())
    ul_df_hold_out_list.append(df_hold_out.to_pandas())
ul_df_train = pd.concat(ul_df_train_list, ignore_index=True)
ul_df_train.to_csv(processed_data_path + "_train_uplink.csv")
ul_df_hold_out = pd.concat(ul_df_hold_out_list, ignore_index=True)
ul_df_hold_out.to_csv(processed_data_path + "_holdout_uplink.csv")

# Process and save downlink DF
downlink_csvs = glob.glob(processed_data_path + "/*_downlink.csv")
dl_df_train_list = []
dl_df_hold_out_list = []
for csv_file in tqdm(downlink_csvs):
    df = cudf.read_csv(csv_file, 
                    usecols = ['Packet_Name','U2G_H_Dist', 'Height', "Num_Members", "UAV_Sending_Interval", "Bytes", "U2G_SINR", "U2G_BER", 
                               "Delay", "Throughput", "Packet_State", "Retry_Count", "Mean_SINR", "Std_Dev_SINR"],
                    dtype=df_dtypes)
    # df["U2G_H_Dist"] = df.apply(h_dist_calc, axis=1)
    # df[['Mean_SINR',"Std_Dev_SINR"]]= df.apply(lambda row: sinr_lognormal_approx(row['U2G_H_Dist'],row['Height']),axis=1,result_type='expand')
    # Filter out rows where mean / std dev of sinr is NaN
    df = df[df['Mean_SINR'].notna()]
    df = df[df['Std_Dev_SINR'].notna()]
    # Let's cap the number of rows for each scenario at 100,000 packets for DL
    if len(df.index) > MAX_NUM_PACKETS:
        df = df.head(MAX_NUM_PACKETS)
    df_train, df_hold_out = train_test_split(df, test_size=HOLDOUT_SPLIT, random_state=40, shuffle=False)
    dl_df_train_list.append(df_train.to_pandas())
    dl_df_hold_out_list.append(df_hold_out.to_pandas())
dl_df_train = pd.concat(dl_df_train_list, ignore_index=True)
dl_df_train.to_csv(processed_data_path + "_train_downlink.csv")
dl_df_hold_out = pd.concat(dl_df_hold_out_list, ignore_index=True)
dl_df_hold_out.to_csv(processed_data_path + "_holdout_downlink.csv")


100%|██████████| 2745/2745 [03:03<00:00, 14.94it/s]
100%|██████████| 2745/2745 [11:06<00:00,  4.12it/s]


# Split Dataset into Train and "Hold Out" (Video Case)

In [2]:
# Date: 07/06/2023
# Split the training dataset into train and hold out, useful for calibration. IMPORTANT: We split the dataset for each scenario first before concatenating
HOLDOUT_SPLIT = 0.2
processed_data_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_Video/BPSK_processed"
# pkt_state_cat = cudf.CategoricalDtype(categories=['Reliable', 'Delay_Exceeded', 'RETRY_LIMIT_REACHED', 'QUEUE_OVERFLOW', 'FAILED', 'INTERFACE_DOWN'])
df_dtypes = {"TxTime": np.float32, "U2G_Distance": np.float32, "Height": np.int16,	"Num_Members": np.int16, "UAV_Sending_Interval": np.int16, "Bytes": np.int16, 
            "U2G_SINR": np.float32, "U2G_BER": np.float32, "Delay": np.float32, "Throughput": np.float32, "Queueing_Time": np.float32, "Packet_State": 'string', 
            "Retry_Count": np.int8, "Incorrectly_Received": np.int8, "Queue_Overflow": np.int8, "Packet_Name": 'string'}

# Process and save uplink DF
uplink_csvs = glob.glob(processed_data_path + "/*_uplink.csv")
ul_df_train_list = []
ul_df_hold_out_list = []
for csv_file in tqdm(uplink_csvs):
    df = cudf.read_csv(csv_file, 
                    usecols = ['Packet_Name','U2G_H_Dist', 'Height', "Num_Members", "UAV_Sending_Interval", "Bytes", "U2G_SINR", "U2G_BER", 
                               "Delay", "Throughput", "Packet_State", "Retry_Count", "Mean_SINR", "Std_Dev_SINR"],
                    dtype=df_dtypes)
    # df["U2G_H_Dist"] = df.apply(h_dist_calc, axis=1)
    # df[['Mean_SINR',"Std_Dev_SINR"]]= df.apply(lambda row: sinr_lognormal_approx(row['U2G_H_Dist'],row['Height']),axis=1,result_type='expand')
    # Filter out rows where mean / std dev of sinr is NaN
    df = df[df['Mean_SINR'].notna()]
    df = df[df['Std_Dev_SINR'].notna()]
    # Let's cap the number of rows for each scenario at 100,000 packets for DL
    if len(df.index) > 100000:
        df = df.head(100000)
    df_train, df_hold_out = train_test_split(df, test_size=HOLDOUT_SPLIT, random_state=40, shuffle=False)
    ul_df_train_list.append(df_train.to_pandas())
    ul_df_hold_out_list.append(df_hold_out.to_pandas())
ul_df_train = pd.concat(ul_df_train_list, ignore_index=True)
ul_df_train.to_csv(processed_data_path + "_train_uplink.csv")
ul_df_hold_out = pd.concat(ul_df_hold_out_list, ignore_index=True)
ul_df_hold_out.to_csv(processed_data_path + "_holdout_uplink.csv")

# Process and save downlink DF
downlink_csvs = glob.glob(processed_data_path + "/*_downlink.csv")
dl_df_train_list = []
dl_df_hold_out_list = []
for csv_file in tqdm(downlink_csvs):
    df = cudf.read_csv(csv_file, 
                    usecols = ['Packet_Name','U2G_H_Dist', 'Height', "Num_Members", "UAV_Sending_Interval", "Bytes", "U2G_SINR", "U2G_BER", 
                               "Delay", "Throughput", "Packet_State", "Retry_Count", "Mean_SINR", "Std_Dev_SINR"],
                    dtype=df_dtypes)
    # df["U2G_H_Dist"] = df.apply(h_dist_calc, axis=1)
    # df[['Mean_SINR',"Std_Dev_SINR"]]= df.apply(lambda row: sinr_lognormal_approx(row['U2G_H_Dist'],row['Height']),axis=1,result_type='expand')
    # Filter out rows where mean / std dev of sinr is NaN
    df = df[df['Mean_SINR'].notna()]
    df = df[df['Std_Dev_SINR'].notna()]
    # Let's cap the number of rows for each scenario at 100,000 packets for DL
    if len(df.index) > 100000:
        df = df.head(100000)
    df_train, df_hold_out = train_test_split(df, test_size=HOLDOUT_SPLIT, random_state=40, shuffle=False)
    dl_df_train_list.append(df_train.to_pandas())
    dl_df_hold_out_list.append(df_hold_out.to_pandas())
dl_df_train = pd.concat(dl_df_train_list, ignore_index=True)
dl_df_train.to_csv(processed_data_path + "_train_downlink.csv")
dl_df_hold_out = pd.concat(dl_df_hold_out_list, ignore_index=True)
dl_df_hold_out.to_csv(processed_data_path + "_holdout_downlink.csv")

# Process and save downlink DF
video_csvs = glob.glob(processed_data_path + "/*_video.csv")
video_df_train_list = []
video_df_hold_out_list = []
for csv_file in tqdm(video_csvs):
    df = cudf.read_csv(csv_file, 
                    usecols = ['Packet_Name','U2G_H_Dist', 'Height', "Num_Members", "UAV_Sending_Interval", "Bytes", "U2G_SINR", "U2G_BER", 
                               "Delay", "Throughput", "Packet_State", "Retry_Count", "Mean_SINR", "Std_Dev_SINR"],
                    dtype=df_dtypes)
    # df["U2G_H_Dist"] = df.apply(h_dist_calc, axis=1)
    # df[['Mean_SINR',"Std_Dev_SINR"]]= df.apply(lambda row: sinr_lognormal_approx(row['U2G_H_Dist'],row['Height']),axis=1,result_type='expand')
    # Filter out rows where mean / std dev of sinr is NaN
    df = df[df['Mean_SINR'].notna()]
    df = df[df['Std_Dev_SINR'].notna()]
    # Let's cap the number of rows for each scenario at 100,000 packets for DL
    if len(df.index) > 100000:
        df = df.head(100000)
    df_train, df_hold_out = train_test_split(df, test_size=HOLDOUT_SPLIT, random_state=40, shuffle=False)
    video_df_train_list.append(df_train.to_pandas())
    video_df_hold_out_list.append(df_hold_out.to_pandas())
video_df_train = pd.concat(video_df_train_list, ignore_index=True)
video_df_train.to_csv(processed_data_path + "_train_video.csv")
video_df_hold_out = pd.concat(video_df_hold_out_list, ignore_index=True)
video_df_hold_out.to_csv(processed_data_path + "_holdout_video.csv")

100%|██████████| 2745/2745 [08:26<00:00,  5.42it/s]


# Compile Reliability Results for each Taguchi Hovering Test Cases

### Downlink

In [4]:
import pandas as pd
import numpy as np
import glob, math
from tqdm import tqdm

delay_threshold = 1
processed_data_path = "/media/research-student/One Touch/FANET Datasets/BPSK_Range_Test_processed" 
save_path = "/media/research-student/One Touch/FANET Datasets/"
# Process and save downlink DF 
downlink_csvs = glob.glob(processed_data_path + "/*_downlink.csv")
dl_df_list = []
for csv_file in tqdm(downlink_csvs):
    df = pd.read_csv(csv_file, 
                    usecols = ['U2G_H_Dist', 'Height', "Num_Members", "UAV_Sending_Interval","Bytes", "Delay", "Packet_State", "Incorrectly_Received", "Queue_Overflow"])
    # u2g_dist = df["U2G_Distance"].mean()
    height = df["Height"].values[0]
    num_members = df["Num_Members"].values[0]
    sending_interval = df["UAV_Sending_Interval"].values[0]
    # packet_size = df["Bytes"].mean()
    # u2g_h_dist = math.sqrt(u2g_dist**2 - height**2)
    u2g_h_dist = df["U2G_H_Dist"].mean()
    num_packets = len(df)
    num_reliable = len(df.loc[df["Packet_State"] == "Reliable"])
    reliability = num_reliable / num_packets
    incr_rcvd_counts = df['Incorrectly_Received'].value_counts()
    incr_rcvd_probs = np.zeros(8).tolist()
    for i in range(8):
        if (i in incr_rcvd_counts):
            incr_rcvd_probs[i] = incr_rcvd_counts[i]/num_packets
    num_delay_excd = len(df.loc[df["Delay"] > delay_threshold])
    delay_excd_prob = num_delay_excd / num_packets
    num_queue_overflow = len(df.loc[df["Queue_Overflow"] > 0])
    queue_overflow_prob = num_queue_overflow / num_packets
    test_case = {"Horizontal_Distance": u2g_h_dist, "Height": height, "Num_Members": num_members, "Sending_Interval": sending_interval,
                 "Reliability": reliability, "Delay_Excd_Prob": delay_excd_prob, "Queue_Overflow_Prob": queue_overflow_prob,
                 "0_Incr_Rcvd": incr_rcvd_probs[0], "1_Incr_Rcvd": incr_rcvd_probs[1], "2_Incr_Rcvd": incr_rcvd_probs[2], "3_Incr_Rcvd": incr_rcvd_probs[3],
                 "4_Incr_Rcvd": incr_rcvd_probs[4], "5_Incr_Rcvd": incr_rcvd_probs[5], "6_Incr_Rcvd": incr_rcvd_probs[6], "7_Incr_Rcvd": incr_rcvd_probs[7]}
    dl_df_list.append(test_case)
dl_df = pd.DataFrame(dl_df_list)
save_filename = "BPSK_Range_Test_downlink"
# dl_df.to_hdf(save_path + "{}.h5".format(save_filename), key='Downlink')
dl_df.to_csv(save_path + "{}.csv".format(save_filename))

100%|██████████| 400/400 [00:19<00:00, 20.69it/s]


### Uplink

In [2]:
import pandas as pd
import numpy as np
import glob, math
from tqdm import tqdm

delay_threshold = 1
processed_data_path = "/media/research-student/One Touch/FANET Datasets/64QAM_Range_Test_processed" 
save_path = "/media/research-student/One Touch/FANET Datasets/"
# Process and save downlink DF 
uplink_csvs = glob.glob(processed_data_path + "/*_uplink.csv")
ul_df_list = []
for csv_file in tqdm(uplink_csvs):
    df = pd.read_csv(csv_file, 
                    usecols = ['U2G_Distance', 'Height', "Num_Members", "UAV_Sending_Interval","Bytes", "Delay", "Packet_State", "Incorrectly_Received", "Queue_Overflow"])
    u2g_dist = df["U2G_Distance"].mean()
    height = df["Height"].values[0]
    num_members = df["Num_Members"].values[0]
    sending_interval = df["UAV_Sending_Interval"].values[0]
    # packet_size = df["Bytes"].mean()
    u2g_h_dist = math.sqrt(u2g_dist**2 - height**2)
    num_packets = len(df)
    num_reliable = len(df.loc[df["Packet_State"] == "Reliable"])
    reliability = num_reliable / num_packets
    incr_rcvd_counts = df['Incorrectly_Received'].value_counts()
    incr_rcvd_probs = np.zeros(8).tolist()
    for i in range(8):
        if (i in incr_rcvd_counts):
            incr_rcvd_probs[i] = incr_rcvd_counts[i]/num_packets
    num_delay_excd = len(df.loc[df["Delay"] > delay_threshold])
    delay_excd_prob = num_delay_excd / num_packets
    num_queue_overflow = len(df.loc[df["Queue_Overflow"] > 0])
    queue_overflow_prob = num_queue_overflow / num_packets
    test_case = {"Horizontal_Distance": u2g_h_dist, "Height": height, "Num_Members": num_members, "Sending_Interval": sending_interval,
                 "Reliability": reliability, "Delay_Excd_Prob": delay_excd_prob, "Queue_Overflow_Prob": queue_overflow_prob,
                 "0_Incr_Rcvd": incr_rcvd_probs[0], "1_Incr_Rcvd": incr_rcvd_probs[1], "2_Incr_Rcvd": incr_rcvd_probs[2], "3_Incr_Rcvd": incr_rcvd_probs[3],
                 "4_Incr_Rcvd": incr_rcvd_probs[4], "5_Incr_Rcvd": incr_rcvd_probs[5], "6_Incr_Rcvd": incr_rcvd_probs[6], "7_Incr_Rcvd": incr_rcvd_probs[7]}
    ul_df_list.append(test_case)
ul_df = pd.DataFrame(ul_df_list)
save_filename = "64QAM_Range_Test_uplink"
# dl_df.to_hdf(save_path + "{}.h5".format(save_filename), key='Downlink')
ul_df.to_csv(save_path + "{}.csv".format(save_filename))

100%|██████████| 300/300 [00:09<00:00, 32.03it/s]


In [3]:
dl_df.to_csv(save_path + "Hovering_Train_partial_Dataset_NP10000_64QAM_65Mbps_downlink.csv")

# Compile Reliability Results for each Taguchi Test Cases v2 (with different modulations)

### Uplink (Multiple Folders for Different Modulations)

In [3]:
import pandas as pd
import numpy as np
import glob, math
from tqdm import tqdm

delay_threshold = 1
processed_data_paths = ["/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/BPSK_Test",
                        "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/QPSK_Test",
                        "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/QAM16_Test",
                        "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/QAM64_Test"]
save_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo"
save_filename = "Multi_Modulation_Test_Cases_Uplink"
ul_df_list = []
# Process and save uplink DF 
for processed_data_path in processed_data_paths:
    # Get modulation
    if "BPSK" in processed_data_path:
        modulation = "BPSK"
    elif "QPSK" in processed_data_path:
        modulation = "QPSK"
    elif "QAM16" in processed_data_path:
        modulation = "QAM16"
    elif "QAM64" in processed_data_path:
        modulation = "QAM64"

    uplink_csvs = glob.glob(processed_data_path + "/*_uplink.csv")
    for csv_file in tqdm(uplink_csvs):
        df = pd.read_csv(csv_file, 
                        usecols = ['U2G_H_Dist', 'Height', "UAV_Sending_Interval","Delay", "Packet_State", "Throughput"])
        df = df.loc[df["Packet_State"].isin(["Reliable", "Delay_Exceeded", "RETRY_LIMIT_REACHED", "QUEUE_OVERFLOW"])] # Filter out unknown FAIL
        u2g_h_dist = df["U2G_H_Dist"].values[0]
        height = df["Height"].values[0]
        uav_sending_interval = df["UAV_Sending_Interval"].values[0]
        throughput = df["Throughput"].mean()
        num_packets = len(df)
        num_reliable = len(df.loc[df["Packet_State"] == "Reliable"])
        num_incr_rcvd = len(df.loc[df["Packet_State"] == "RETRY_LIMIT_REACHED"])
        num_queue_overflow = len(df.loc[df["Packet_State"] == "QUEUE_OVERFLOW"])
        num_delay_excd = len(df.loc[df["Packet_State"] == "Delay_Exceeded"])
        reliability = num_reliable / num_packets
        incr_rcvd_prob = num_incr_rcvd / num_packets
        queue_overflow_prob = num_queue_overflow / num_packets
        delay_excd_prob = num_delay_excd / num_packets
        test_case = {"Horizontal_Distance": u2g_h_dist, "Height": height, "Modulation": modulation, "UAV_Sending_Interval": uav_sending_interval, "Throughput": throughput,
                    "Reliability": reliability, "Delay_Excd_Prob": delay_excd_prob, "Queue_Overflow_Prob": queue_overflow_prob, "Incorrectly_Rcvd_Prob": incr_rcvd_prob}
        ul_df_list.append(test_case)
ul_df = pd.DataFrame(ul_df_list)
# dl_df.to_hdf(save_path + "{}.h5".format(save_filename), key='Downlink')
ul_df.to_csv(save_path + "/{}.csv".format(save_filename))

100%|██████████| 100/100 [00:02<00:00, 43.22it/s]
100%|██████████| 100/100 [00:02<00:00, 43.41it/s]
100%|██████████| 100/100 [00:02<00:00, 43.77it/s]
100%|██████████| 100/100 [00:02<00:00, 45.19it/s]


### Uplink (One folder for all Modulations)

In [1]:
import pandas as pd
import numpy as np
import glob, math
from tqdm import tqdm
import cudf 

cudf.set_allocator("managed")

processed_data_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/Test/Test_Dataset_1_100000_processed"
save_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/Test"
save_filename = "Multi_Modulation_Test_Cases_1_100000_Uplink"
ul_df_list = []
# Process and save uplink DF 
uplink_csvs = glob.glob(processed_data_path + "/*_uplink.csv")
for csv_file in tqdm(uplink_csvs):
    # Get modulation
    if "BPSK" in csv_file:
        modulation = "BPSK"
    elif "QPSK" in csv_file:
        modulation = "QPSK"
    elif "QAM-16" in csv_file:
        modulation = "QAM16"
    elif "QAM-64" in csv_file:
        modulation = "QAM64"
    df = cudf.read_csv(csv_file, 
                    usecols = ['U2G_H_Dist', 'Height', "UAV_Sending_Interval","Delay", "Packet_State", "Throughput"])
    df = df.loc[df["Packet_State"].isin(["Reliable", "Delay_Exceeded", "RETRY_LIMIT_REACHED", "QUEUE_OVERFLOW"])] # Filter out unknown FAIL
    u2g_h_dist = df["U2G_H_Dist"].values[0]
    height = df["Height"].values[0]
    uav_sending_interval = df["UAV_Sending_Interval"].values[0]
    throughput = df["Throughput"].mean()
    num_packets = len(df)
    num_reliable = len(df.loc[df["Packet_State"] == "Reliable"])
    num_incr_rcvd = len(df.loc[df["Packet_State"] == "RETRY_LIMIT_REACHED"])
    num_queue_overflow = len(df.loc[df["Packet_State"] == "QUEUE_OVERFLOW"])
    num_delay_excd = len(df.loc[df["Packet_State"] == "Delay_Exceeded"])
    reliability = num_reliable / num_packets
    incr_rcvd_prob = num_incr_rcvd / num_packets
    queue_overflow_prob = num_queue_overflow / num_packets
    delay_excd_prob = num_delay_excd / num_packets
    test_case = {"Horizontal_Distance": u2g_h_dist, "Height": height, "Modulation": modulation, "UAV_Sending_Interval": uav_sending_interval, "Throughput": throughput,
                "Reliability": reliability, "Delay_Excd_Prob": delay_excd_prob, "Queue_Overflow_Prob": queue_overflow_prob, "Incorrectly_Rcvd_Prob": incr_rcvd_prob}
    ul_df_list.append(test_case)
ul_df = pd.DataFrame(ul_df_list)
ul_df.to_csv(save_path + "/{}.csv".format(save_filename))

100%|██████████| 53/53 [00:21<00:00,  2.51it/s]


### Downlink (One folder for all Modulations)

In [23]:
import pandas as pd
import numpy as np
import glob, math
from tqdm import tqdm
import cudf 

cudf.set_allocator("managed")

processed_data_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/Test/Test_Dataset_1_100000_processed"
save_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/Test"
save_filename = "Multi_Modulation_Test_Cases_1_100000_Downlink"
dl_df_list = []
# Process and save downlink DF 
downlink_csvs = glob.glob(processed_data_path + "/*_downlink.csv")
for csv_file in tqdm(downlink_csvs):
    # Get modulation
    if "BPSK" in csv_file:
        modulation = "BPSK"
    elif "QPSK" in csv_file:
        modulation = "QPSK"
    elif "QAM-16" in csv_file:
        modulation = "QAM16"
    elif "QAM-64" in csv_file:
        modulation = "QAM64"
    df = cudf.read_csv(csv_file, 
                    usecols = ['U2G_H_Dist', 'Height', "UAV_Sending_Interval","Delay", "Packet_State", "Throughput"])
    df = df.loc[df["Packet_State"].isin(["Reliable", "Delay_Exceeded", "RETRY_LIMIT_REACHED", "QUEUE_OVERFLOW"])] # Filter out unknown FAIL
    u2g_h_dist = df["U2G_H_Dist"].values[0]
    height = df["Height"].values[0]
    uav_sending_interval = df["UAV_Sending_Interval"].values[0]
    throughput = df["Throughput"].mean()
    num_packets = len(df)
    num_reliable = len(df.loc[df["Packet_State"] == "Reliable"])
    num_incr_rcvd = len(df.loc[df["Packet_State"] == "RETRY_LIMIT_REACHED"])
    num_queue_overflow = len(df.loc[df["Packet_State"] == "QUEUE_OVERFLOW"])
    num_delay_excd = len(df.loc[df["Packet_State"] == "Delay_Exceeded"])
    reliability = num_reliable / num_packets
    incr_rcvd_prob = num_incr_rcvd / num_packets
    queue_overflow_prob = num_queue_overflow / num_packets
    delay_excd_prob = num_delay_excd / num_packets
    test_case = {"Horizontal_Distance": u2g_h_dist, "Height": height, "Modulation": modulation, "UAV_Sending_Interval": uav_sending_interval, "Throughput": throughput,
                "Reliability": reliability, "Delay_Excd_Prob": delay_excd_prob, "Queue_Overflow_Prob": queue_overflow_prob, "Incorrectly_Rcvd_Prob": incr_rcvd_prob}
    dl_df_list.append(test_case)
dl_df = pd.DataFrame(dl_df_list)
# dl_df.to_hdf(save_path + "{}.h5".format(save_filename), key='Downlink')
dl_df.to_csv(save_path + "/{}.csv".format(save_filename))

  0%|          | 0/55 [00:00<?, ?it/s]

100%|██████████| 55/55 [01:01<00:00,  1.13s/it]


### Video (One folder for all Modulations)

In [4]:
import pandas as pd
import numpy as np
import glob, math
from tqdm import tqdm

delay_threshold = 1
processed_data_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_Video/Test/Test_Dataset_1_processed"
save_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_Video/Test"
save_filename = "Multi_Modulation_Test_Cases_1_Video"
vid_df_list = []
# Process and save uplink DF 
video_csvs = glob.glob(processed_data_path + "/*_video.csv")
for csv_file in tqdm(video_csvs):
    # Get modulation
    if "BPSK" in csv_file:
        modulation = "BPSK"
    elif "QPSK" in csv_file:
        modulation = "QPSK"
    elif "QAM-16" in csv_file:
        modulation = "QAM16"
    elif "QAM-64" in csv_file:
        modulation = "QAM64"
    df = pd.read_csv(csv_file, 
                    usecols = ['U2G_H_Dist', 'Height', "UAV_Sending_Interval","Delay", "Packet_State", "Throughput"])
    df = df.loc[df["Packet_State"].isin(["Reliable", "Delay_Exceeded", "RETRY_LIMIT_REACHED", "QUEUE_OVERFLOW"])] # Filter out unknown FAIL
    u2g_h_dist = df["U2G_H_Dist"].values[0]
    height = df["Height"].values[0]
    uav_sending_interval = df["UAV_Sending_Interval"].values[0]
    throughput = df["Throughput"].mean()
    num_packets = len(df)
    num_reliable = len(df.loc[df["Packet_State"] == "Reliable"])
    num_incr_rcvd = len(df.loc[df["Packet_State"] == "RETRY_LIMIT_REACHED"])
    num_queue_overflow = len(df.loc[df["Packet_State"] == "QUEUE_OVERFLOW"])
    num_delay_excd = len(df.loc[df["Packet_State"] == "Delay_Exceeded"])
    reliability = num_reliable / num_packets
    incr_rcvd_prob = num_incr_rcvd / num_packets
    queue_overflow_prob = num_queue_overflow / num_packets
    delay_excd_prob = num_delay_excd / num_packets
    test_case = {"Horizontal_Distance": u2g_h_dist, "Height": height, "Modulation": modulation, "UAV_Sending_Interval": uav_sending_interval, "Throughput": throughput,
                "Reliability": reliability, "Delay_Excd_Prob": delay_excd_prob, "Queue_Overflow_Prob": queue_overflow_prob, "Incorrectly_Rcvd_Prob": incr_rcvd_prob}
    vid_df_list.append(test_case)
vid_df = pd.DataFrame(vid_df_list)
# dl_df.to_hdf(save_path + "{}.h5".format(save_filename), key='Downlink')
vid_df.to_csv(save_path + "/{}.csv".format(save_filename))

100%|██████████| 960/960 [05:46<00:00,  2.77it/s]


### Uplink (Case Studies)

In [5]:
import pandas as pd
import numpy as np
import glob, math
from tqdm import tqdm

delay_threshold = 1
processed_data_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/Case_Studies_processed"
save_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo"
save_filename = "Case_Studies_Uplink"
ul_df_list = []
# Process and save uplink DF 
uplink_csvs = glob.glob(processed_data_path + "/*_uplink.csv")
for csv_file in tqdm(uplink_csvs):
    # Get modulation
    if "BPSK" in csv_file:
        modulation = "BPSK"
    elif "QPSK" in csv_file:
        modulation = "QPSK"
    elif "QAM-16" in csv_file:
        modulation = "QAM16"
    elif "QAM-64" in csv_file:
        modulation = "QAM64"
    df = pd.read_csv(csv_file, 
                    usecols = ['U2G_H_Dist', 'Height', "UAV_Sending_Interval","Delay", "Packet_State", "Throughput"])
    df = df.loc[df["Packet_State"].isin(["Reliable", "Delay_Exceeded", "RETRY_LIMIT_REACHED", "QUEUE_OVERFLOW"])] # Filter out unknown FAIL
    u2g_h_dist = df["U2G_H_Dist"].values[0]
    height = df["Height"].values[0]
    uav_sending_interval = df["UAV_Sending_Interval"].values[0]
    throughput = df["Throughput"].mean()
    num_packets = len(df)
    num_reliable = len(df.loc[df["Packet_State"] == "Reliable"])
    num_incr_rcvd = len(df.loc[df["Packet_State"] == "RETRY_LIMIT_REACHED"])
    num_queue_overflow = len(df.loc[df["Packet_State"] == "QUEUE_OVERFLOW"])
    num_delay_excd = len(df.loc[df["Packet_State"] == "Delay_Exceeded"])
    reliability = num_reliable / num_packets
    incr_rcvd_prob = num_incr_rcvd / num_packets
    queue_overflow_prob = num_queue_overflow / num_packets
    delay_excd_prob = num_delay_excd / num_packets
    test_case = {"Horizontal_Distance": u2g_h_dist, "Height": height, "Modulation": modulation, "UAV_Sending_Interval": uav_sending_interval, "Throughput": throughput,
                "Reliability": reliability, "Delay_Excd_Prob": delay_excd_prob, "Queue_Overflow_Prob": queue_overflow_prob, "Incorrectly_Rcvd_Prob": incr_rcvd_prob}
    ul_df_list.append(test_case)
ul_df = pd.DataFrame(ul_df_list)
# dl_df.to_hdf(save_path + "{}.h5".format(save_filename), key='Downlink')
ul_df.to_csv(save_path + "/{}.csv".format(save_filename))

100%|██████████| 488/488 [00:28<00:00, 17.21it/s]


# Compile Throughput Dataset from Processed CSV Files

## No Video

In [2]:
# Date: 03/07/2023
# Compile the throughput training dataset, using only unique rows. IMPORTANT: We split the dataset for each scenario first before concatenating
processed_data_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/BPSK_processed"
df_dtypes = {"TxTime": np.float32, "U2G_Distance": np.float32, "Height": np.int16,	"Num_Members": np.int16, "UAV_Sending_Interval": np.int16, "Bytes": np.int16, 
            "U2G_SINR": np.float32, "U2G_BER": np.float32, "Delay": np.float32, "Throughput": np.float32, "Queueing_Time": np.float32, "Packet_State": 'string', 
            "Retry_Count": np.int8, "Incorrectly_Received": np.int8, "Queue_Overflow": np.int8, "Packet_Name": 'string'}

# Process and save uplink DF
uplink_csvs = glob.glob(processed_data_path + "/*_uplink.csv")
ul_df_train_list = []
for csv_file in tqdm(uplink_csvs):
    df = cudf.read_csv(csv_file, 
                    usecols = ["UAV_Sending_Interval", "Mean_SINR", "Std_Dev_SINR", "Throughput"],
                    dtype=df_dtypes)
    # df["U2G_H_Dist"] = df.apply(h_dist_calc, axis=1)
    # df[['Mean_SINR',"Std_Dev_SINR"]]= df.apply(lambda row: sinr_lognormal_approx(row['U2G_H_Dist'],row['Height']),axis=1,result_type='expand')
    # Filter out rows where mean / std dev of sinr is NaN
    df = df[df['Mean_SINR'].notna()]
    df = df[df['Std_Dev_SINR'].notna()]
    # Let's remove duplicated throughput data rows
    df.drop_duplicates(subset=["UAV_Sending_Interval", "Mean_SINR", "Std_Dev_SINR", "Throughput"], inplace=True, ignore_index=True)
    ul_df_train_list.append(df.to_pandas())
ul_df_train = pd.concat(ul_df_train_list, ignore_index=True)
ul_df_train.to_csv(processed_data_path + "_throughput_uplink.csv")

# Process and save downlink DF
downlink_csvs = glob.glob(processed_data_path + "/*_downlink.csv")
dl_df_train_list = []
dl_df_hold_out_list = []
for csv_file in tqdm(downlink_csvs):
    df = cudf.read_csv(csv_file, 
                    usecols = ["UAV_Sending_Interval", "Mean_SINR", "Std_Dev_SINR", "Throughput"],
                    dtype=df_dtypes)
    # df["U2G_H_Dist"] = df.apply(h_dist_calc, axis=1)
    # df[['Mean_SINR',"Std_Dev_SINR"]]= df.apply(lambda row: sinr_lognormal_approx(row['U2G_H_Dist'],row['Height']),axis=1,result_type='expand')
    # Filter out rows where mean / std dev of sinr is NaN
    df = df[df['Mean_SINR'].notna()]
    df = df[df['Std_Dev_SINR'].notna()]
    # Let's remove duplicated throughput data rows
    df.drop_duplicates(subset=["UAV_Sending_Interval", "Mean_SINR", "Std_Dev_SINR", "Throughput"], inplace=True, ignore_index=True)
    dl_df_train_list.append(df.to_pandas())
dl_df_train = pd.concat(dl_df_train_list, ignore_index=True)
dl_df_train.to_csv(processed_data_path + "_throughput_downlink.csv")


100%|██████████| 2745/2745 [01:27<00:00, 31.51it/s]
100%|██████████| 2745/2745 [06:52<00:00,  6.66it/s]


## With Video (From processed CSV files)

In [None]:
# Date: 03/07/2023
# Compile the throughput training dataset, using only unique rows. IMPORTANT: We split the dataset for each scenario first before concatenating
processed_data_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_Video/QAM64_processed"
df_dtypes = {"TxTime": np.float32, "U2G_Distance": np.float32, "Height": np.int16,	"Num_Members": np.int16, "UAV_Sending_Interval": np.int16, "Bytes": np.int16, 
            "U2G_SINR": np.float32, "U2G_BER": np.float32, "Delay": np.float32, "Throughput": np.float32, "Queueing_Time": np.float32, "Packet_State": 'string', 
            "Retry_Count": np.int8, "Incorrectly_Received": np.int8, "Queue_Overflow": np.int8, "Packet_Name": 'string'}

# Process and save uplink DF
uplink_csvs = glob.glob(processed_data_path + "/*_uplink.csv")
ul_df_train_list = []
for csv_file in tqdm(uplink_csvs):
    df = cudf.read_csv(csv_file, 
                    usecols = ["UAV_Sending_Interval", "Mean_SINR", "Std_Dev_SINR", "Throughput"],
                    dtype=df_dtypes)
    # df["U2G_H_Dist"] = df.apply(h_dist_calc, axis=1)
    # df[['Mean_SINR',"Std_Dev_SINR"]]= df.apply(lambda row: sinr_lognormal_approx(row['U2G_H_Dist'],row['Height']),axis=1,result_type='expand')
    # Filter out rows where mean / std dev of sinr is NaN
    df = df[df['Mean_SINR'].notna()]
    df = df[df['Std_Dev_SINR'].notna()]
    # Let's remove duplicated throughput data rows
    df.drop_duplicates(subset=["UAV_Sending_Interval", "Mean_SINR", "Std_Dev_SINR", "Throughput"], inplace=True, ignore_index=True)
    ul_df_train_list.append(df.to_pandas())
ul_df_train = pd.concat(ul_df_train_list, ignore_index=True)
ul_df_train.to_csv(processed_data_path + "_throughput_uplink.csv")

# Process and save downlink DF
downlink_csvs = glob.glob(processed_data_path + "/*_downlink.csv")
dl_df_train_list = []
for csv_file in tqdm(downlink_csvs):
    df = cudf.read_csv(csv_file, 
                    usecols = ["UAV_Sending_Interval", "Mean_SINR", "Std_Dev_SINR", "Throughput"],
                    dtype=df_dtypes)
    # df["U2G_H_Dist"] = df.apply(h_dist_calc, axis=1)
    # df[['Mean_SINR',"Std_Dev_SINR"]]= df.apply(lambda row: sinr_lognormal_approx(row['U2G_H_Dist'],row['Height']),axis=1,result_type='expand')
    # Filter out rows where mean / std dev of sinr is NaN
    df = df[df['Mean_SINR'].notna()]
    df = df[df['Std_Dev_SINR'].notna()]
    # Let's remove duplicated throughput data rows
    df.drop_duplicates(subset=["UAV_Sending_Interval", "Mean_SINR", "Std_Dev_SINR", "Throughput"], inplace=True, ignore_index=True)
    dl_df_train_list.append(df.to_pandas())
dl_df_train = pd.concat(dl_df_train_list, ignore_index=True)
dl_df_train.to_csv(processed_data_path + "_throughput_downlink.csv")

# Process and save video DF
video_csvs = glob.glob(processed_data_path + "/*_video.csv")
vid_df_train_list = []
for csv_file in tqdm(video_csvs):
    df = cudf.read_csv(csv_file, 
                    usecols = ["UAV_Sending_Interval", "Mean_SINR", "Std_Dev_SINR", "Throughput"],
                    dtype=df_dtypes)
    # df["U2G_H_Dist"] = df.apply(h_dist_calc, axis=1)
    # df[['Mean_SINR',"Std_Dev_SINR"]]= df.apply(lambda row: sinr_lognormal_approx(row['U2G_H_Dist'],row['Height']),axis=1,result_type='expand')
    # Filter out rows where mean / std dev of sinr is NaN
    df = df[df['Mean_SINR'].notna()]
    df = df[df['Std_Dev_SINR'].notna()]
    # Let's remove duplicated throughput data rows
    df.drop_duplicates(subset=["UAV_Sending_Interval", "Mean_SINR", "Std_Dev_SINR", "Throughput"], inplace=True, ignore_index=True)
    vid_df_train_list.append(df.to_pandas())
vid_df_train = pd.concat(vid_df_train_list, ignore_index=True)
vid_df_train.to_csv(processed_data_path + "_throughput_video.csv")


# Examine individual case

In [12]:
import pandas as pd
import numpy as np
import glob, math
from tqdm import tqdm

delay_threshold = 0.04
csv_file = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_64QAM_65Mbps_Hovering/8UAVs_processed/NumMember-7_InterUAVDistance-5_Height-24_Distance-625_PacketSize-920_SendingRate-808_downlink.csv"
df = pd.read_csv(csv_file, 
                usecols = ['U2G_Distance', 'Height', "Num_Members", "Mean_Sending_Interval","Bytes", "Delay", "Packet_State", "Incorrectly_Received", "Queue_Overflow"])
df["U2G_Distance"].fillna(method="bfill", inplace=True) 
u2g_dist = df["U2G_Distance"].values[0]
height = df["Height"].values[0]
num_members = df["Num_Members"].values[0]
sending_interval = df["Mean_Sending_Interval"].values[0]
packet_size = df["Bytes"].values[0]
u2g_h_dist = math.sqrt(u2g_dist**2 - height**2)
num_packets = len(df)
num_reliable = len(df.loc[df["Packet_State"] == "Reliable"])
reliability = num_reliable / num_packets
incr_rcvd_counts = df['Incorrectly_Received'].value_counts()
incr_rcvd_probs = np.zeros(8).tolist()
for i in range(8):
    if (i in incr_rcvd_counts):
        incr_rcvd_probs[i] = incr_rcvd_counts[i]/num_packets
num_delay_excd = len(df.loc[df["Delay"] > delay_threshold])
delay_excd_prob = num_delay_excd / num_packets
num_queue_overflow = len(df.loc[df["Queue_Overflow"] > 0])
queue_overflow_prob = num_queue_overflow / num_packets
test_case = {"Horizontal_Distance": u2g_h_dist, "Height": height, "Num_Members": num_members, "Sending_Interval": sending_interval, "Packet_Size": packet_size,
                "Reliability": reliability, "Delay_Excd_Prob": delay_excd_prob, "Queue_Overflow_Prob": queue_overflow_prob,
                "0_Incr_Rcvd": incr_rcvd_probs[0], "1_Incr_Rcvd": incr_rcvd_probs[1], "2_Incr_Rcvd": incr_rcvd_probs[2], "3_Incr_Rcvd": incr_rcvd_probs[3],
                "4_Incr_Rcvd": incr_rcvd_probs[4], "5_Incr_Rcvd": incr_rcvd_probs[5], "6_Incr_Rcvd": incr_rcvd_probs[6], "7_Incr_Rcvd": incr_rcvd_probs[7]}

In [13]:
test_case

{'Horizontal_Distance': 125.01715882229927,
 'Height': 24,
 'Num_Members': 7,
 'Sending_Interval': 904,
 'Packet_Size': 972,
 'Reliability': 0.27374525094981006,
 'Delay_Excd_Prob': 0.6038792241551689,
 'Queue_Overflow_Prob': 0.0,
 '0_Incr_Rcvd': 0.11137772445510898,
 '1_Incr_Rcvd': 0.10667866426714658,
 '2_Incr_Rcvd': 0.09368126374725055,
 '3_Incr_Rcvd': 0.07728454309138172,
 '4_Incr_Rcvd': 0.06898620275944811,
 '5_Incr_Rcvd': 0.0613877224555089,
 '6_Incr_Rcvd': 0.07368526294741051,
 '7_Incr_Rcvd': 0.40691861627674464}

# Compare No. of Packets for Probabilities

In [20]:
import pandas as pd
import numpy as np
import glob, math
from tqdm import tqdm
import cudf 

cudf.set_allocator("managed")
NUM_PACKETS = 100000
processed_data_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/Test/Test_Dataset_1_processed"
save_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/Test"
save_filename = "Test_Dataset_1_uplink_{}_packets".format(NUM_PACKETS)
dl_df_list = []
# Process and save downlink DF 
downlink_csvs = glob.glob(processed_data_path + "/*_uplink.csv")
for csv_file in tqdm(downlink_csvs):
    # Get modulation
    if "BPSK" in csv_file:
        modulation = "BPSK"
    elif "QPSK" in csv_file:
        modulation = "QPSK"
    elif "QAM-16" in csv_file:
        modulation = "QAM16"
    elif "QAM-64" in csv_file:
        modulation = "QAM64"
    df = cudf.read_csv(csv_file, 
                    usecols = ['U2G_H_Dist', 'Height', "UAV_Sending_Interval","Delay", "Packet_State", "Throughput"])
    df = df.loc[df["Packet_State"].isin(["Reliable", "Delay_Exceeded", "RETRY_LIMIT_REACHED", "QUEUE_OVERFLOW"])] # Filter out unknown FAIL
    if len(df.index) > NUM_PACKETS:
        df = df.head(NUM_PACKETS)
    u2g_h_dist = df["U2G_H_Dist"].values[0]
    height = df["Height"].values[0]
    uav_sending_interval = df["UAV_Sending_Interval"].values[0]
    throughput = df["Throughput"].mean()
    num_packets = len(df)
    num_reliable = len(df.loc[df["Packet_State"] == "Reliable"])
    num_incr_rcvd = len(df.loc[df["Packet_State"] == "RETRY_LIMIT_REACHED"])
    num_queue_overflow = len(df.loc[df["Packet_State"] == "QUEUE_OVERFLOW"])
    num_delay_excd = len(df.loc[df["Packet_State"] == "Delay_Exceeded"])
    reliability = num_reliable / num_packets
    incr_rcvd_prob = num_incr_rcvd / num_packets
    queue_overflow_prob = num_queue_overflow / num_packets
    delay_excd_prob = num_delay_excd / num_packets
    test_case = {"Horizontal_Distance": u2g_h_dist, "Height": height, "Modulation": modulation, "UAV_Sending_Interval": uav_sending_interval, "Throughput": throughput,
                "Reliability": reliability, "Delay_Excd_Prob": delay_excd_prob, "Queue_Overflow_Prob": queue_overflow_prob, "Incorrectly_Rcvd_Prob": incr_rcvd_prob}
    dl_df_list.append(test_case)
dl_df = pd.DataFrame(dl_df_list)
# dl_df.to_hdf(save_path + "{}.h5".format(save_filename), key='Downlink')
dl_df.to_csv(save_path + "/{}.csv".format(save_filename))

  0%|          | 3/960 [00:00<00:42, 22.30it/s]

100%|██████████| 960/960 [00:42<00:00, 22.40it/s]


In [21]:
test_df_100000 = pd.read_csv("/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/Test/Test_Dataset_1_uplink_100000_packets.csv")
test_df_10000 = pd.read_csv("/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_MultiModulation_Hovering_NoVideo/Test/Test_Dataset_1_uplink_50000_packets.csv")

abs_err_rel = abs(test_df_100000["Reliability"] - test_df_10000["Reliability"])
max_abs_err_rel = max(abs_err_rel)
idx_max_abs_err_rel = abs_err_rel.idxmax()
abs_err_incr_rcvd = abs(test_df_100000["Incorrectly_Rcvd_Prob"] - test_df_10000["Incorrectly_Rcvd_Prob"])
max_abs_err_incr_rcvd = max(abs_err_incr_rcvd)
idx_max_abs_err_incr_rcvd = abs_err_incr_rcvd.idxmax()
abs_err_queue_overflow = abs(test_df_100000["Queue_Overflow_Prob"] - test_df_10000["Queue_Overflow_Prob"])
max_abs_err_queue_overflow = max(abs_err_queue_overflow)
idx_max_abs_err_queue_overflow = abs_err_queue_overflow.idxmax()
abs_err_delay_excd = abs(test_df_100000["Delay_Excd_Prob"] - test_df_10000["Delay_Excd_Prob"])
max_abs_err_delay_excd = max(abs_err_delay_excd)
idx_max_abs_err_delay_excd = abs_err_delay_excd.idxmax()

print(max_abs_err_rel)
print(max_abs_err_incr_rcvd)
print(max_abs_err_queue_overflow)
print(max_abs_err_delay_excd)

0.0
0.0
0.0
0.0
