In [1]:
import csv
import pandas as pd # for data manipulation 
import numpy as np
import os, sys, glob, math, pickle
from tqdm import tqdm

# This function helps to calculate probability distribution, which goes into BBN (note, can handle up to 2 parents)
def cpt_probs(df, child, parents):
    try:
        # dependencies_arr = [pd.Categorical(df[parent],categories=df[parent].cat.categories.tolist()) for parent in parents]
        dependencies_arr = [df[parent] for parent in parents]
        # cpt = pd.crosstab(dependencies_arr, df[child], rownames=parents, colnames=[child], margins=False, normalize='index', dropna=False).sort_index().to_numpy().reshape(-1).tolist()
        cpt = pd.crosstab(dependencies_arr, df[child], rownames=parents, colnames=[child], margins=False, normalize='index', dropna=False).sort_index()
        return cpt
    except Exception as err:
        print(err)
        return None 

def euclidean_dist(row):
    # Function to calc euclidean distance on every df row 
    euc_dist = math.sqrt(row["U2G_Distance"]**2 - row["Height"]**2)
    return euc_dist

Test compilation of dataset using pandas (CPU)

In [11]:
processed_data_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_BPSK_6-5Mbps/Dataset_NP10000_BPSK_6-5Mbps_8UAVs_processed"
# Process and save downlink DF
downlink_csvs = glob.glob(processed_data_path + "/*_downlink.csv")
dl_df_list = []
for csv_file in tqdm(downlink_csvs):
    df = pd.read_csv(csv_file)
    e2e_delay = df["Delay"].to_numpy()
    jitter = e2e_delay[1:] - e2e_delay[0:-1]
    jitter = np.insert(jitter,0,0)
    df["Jitter"] = jitter
    dl_df_list.append(df)
dl_df = pd.concat(dl_df_list, ignore_index=True)
dl_df["U2G_H_Dist"] = dl_df.apply(lambda row: euclidean_dist(row), axis=1)
# dl_df.to_csv(processed_data_path + "_test_pandas.csv", index=False)

100%|██████████| 125/125 [00:13<00:00,  9.43it/s]


In [10]:
import cudf
cudf.set_allocator("managed")
processed_data_path = "/media/research-student/One Touch/FANET Datasets/Dataset_NP10000_BPSK_6-5Mbps/Dataset_NP10000_BPSK_6-5Mbps_8UAVs_processed"
# Process and save downlink DF
dl_df_list = []
downlink_csvs = glob.glob(processed_data_path + "/*_uplink.csv")
for csv_file in tqdm(downlink_csvs):
    df = cudf.read_csv(csv_file, 
                    usecols = ['U2G_Distance', 'Height', "Num_Members", "Mean_Sending_Interval","Bytes", "U2G_SINR", "U2G_BER", 
                               "Delay", "Throughput", "Queueing_Time", "Packet_State", "Retry_Count", "Incorrectly_Received", "Queue_Overflow"])
    e2e_delay = df["Delay"].to_numpy()
    jitter = e2e_delay[1:] - e2e_delay[0:-1]
    jitter = np.insert(jitter,0,0)
    df["Jitter"] = jitter
    df["U2G_H_Dist"] = df.apply(euclidean_dist, axis=1)
    dl_df_list.append(df.to_pandas())
dl_df = pd.concat(dl_df_list, ignore_index=True)
# dl_df.to_csv(processed_data_path + "_test_pandas.csv", index=False)

100%|██████████| 125/125 [00:14<00:00,  8.61it/s]


In [11]:
df.memory_usage().sum()

54051243

In [12]:
dl_df.memory_usage().sum()

6402750848

In [18]:
dl_df.to_hdf(processed_data_path + "_test_pandas.h5",  key='8_UAVs')

  check_attribute_name(name)


In [3]:
dl_df.head()

Unnamed: 0,index,RxTime,TxTime,Packet_Name,Bytes,RSSI,U2G_SINR,U2U_SINR,U2G_BER,U2U_BER,...,Incorrectly_Received,Queue_Overflow,Packet_State,Throughput,Height,Inter_UAV_Distance,Num_Members,Mean_Sending_Interval,Jitter,U2G_H_Dist
0,38150,0.001026,0.0,CNCData-0,103,6.05744e-07,5.95662,241150.0,0.000278682,0.0,...,0.0,0.0,Reliable,317.0,120,5,7,521,0.0,0.0
1,0,0.429328,0.429,CNCData-1,169,3.55206e-10,140.41,,2.4884e-63,,...,0.0,0.0,Reliable,317.0,120,5,7,521,-0.000698,0.0
2,27153,0.911572,0.911,CNCData-2,45,2.47617e-08,133.228,9856.83,3.35962e-60,0.0,...,0.0,0.0,Reliable,317.0,120,5,7,521,0.000244,0.0
3,21772,1.465868,1.465,CNCData-3,100,2.7075e-07,36.887,107786.0,4.37984e-18,0.0,...,0.0,0.0,Reliable,348.0,120,5,7,521,0.000296,0.0
4,38151,1.916248,1.915,CNCData-4,248,4.29181e-07,188.207,170859.0,3.7540800000000003e-84,0.0,...,0.0,0.0,Reliable,348.0,120,5,7,521,0.00038,0.0
