In [26]:
import numpy as np
import keras
import math
import os
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# Ref: https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, dataset_details_df, test_split=0.2, type='train', batch_size=32, input_dim=4, n_classes=4, shuffle=None):
        'Initialization'
        assert ((type == "train") or (type == "Train") or (type == "test") or (type == "Test")), "Type needs to be either 'train'/'test'"
        self.input_dim = input_dim
        self.batch_size = batch_size
        self.dataset_details_df = dataset_details_df
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.type = type
        self.dataset_len = self.dataset_details_df["Num_Sent"].sum()
        self.cumulative_index = self.dataset_details_df["Num_Sent"].cumsum(axis=0).values # To help with finding the df row based on packet index
        self.indexes = np.array([]) # Initialize empty array, make sure this is done before calling self.on_epoch_end()
        if self.type == 'train':
            self.test_split = test_split
        elif self.type == 'test':
            self.test_split = 1 - test_split
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        if self.indexes.size > 0:
            return int(np.ceil(len(self.indexes) / self.batch_size))
        else:
            if ((self.type == "train") or (self.type == "Train")):
                return int(np.ceil(self.dataset_len * (1 - self.test_split) / self.batch_size))
            elif ((self.type == "test") or (self.type == "Test")):
                return int(np.ceil(self.dataset_len * self.test_split / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Generate data
        X, y = self.__data_generation(indexes)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        if self.shuffle == 'all':
            if self.test_split == 0 or self.test_split == 1: # Don't split the dataset
                self.indexes = np.arange(self.dataset_len)
                np.random.shuffle(self.indexes)
            else:
                self.indexes, _ = train_test_split(np.arange(self.dataset_len), test_size=self.test_split, random_state=0, shuffle=True)
            
        elif self.shuffle == 'row':
            # Shuffle samples within each dataset row only
            tmp_indexes = np.arange(self.cumulative_index[0])
            if self.test_split == 0 or self.test_split == 1: # Don't split the dataset
                np.random.shuffle(tmp_indexes)
            else:
                tmp_indexes, _ = train_test_split(tmp_indexes, test_size=self.test_split, random_state=0, shuffle=True)
            self.indexes = tmp_indexes
            for i in range(1, len(self.cumulative_index)):
                tmp_indexes = np.arange(self.cumulative_index[i-1], self.cumulative_index[i])
                if self.test_split == 0 or self.test_split == 1: # Don't split the dataset
                    np.random.shuffle(tmp_indexes)
                else:
                    tmp_indexes, _ = train_test_split(tmp_indexes, test_size=self.test_split, random_state=0, shuffle=True)
                self.indexes = np.append(self.indexes, tmp_indexes)
        else:
            # Defaults to no shuffle
            if self.test_split == 0 or self.test_split == 1: # Don't split the dataset
                self.indexes = np.arange(self.dataset_len)
            else:
                self.indexes, _ = train_test_split(np.arange(self.dataset_len), test_size=self.test_split, random_state=0, shuffle=False)
                print(self.test_split)
                print(self.dataset_len)
                print(len(self.indexes))

    def __data_generation(self, indexes):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((len(indexes), self.input_dim))
        y = np.empty((len(indexes)), dtype=int)
        # Generate data
        for i, index in enumerate(indexes):
            # NOTE: Index in indexes start at 0, bear this is mind when comparing index positions
            row_index = np.searchsorted(self.cumulative_index, index, side='right') # The df row this packet index points to (specifying the scenario)
            # Make sure to sort the columns in df_row based on the categorical order encoding for packet state: {"Reliable":0, "QUEUE_OVERFLOW":1, "RETRY_LIMIT_REACHED":2, "Delay_Exceeded":3}
            df_row = self.dataset_details_df.loc[row_index, ["Mean_SINR", "Std_Dev_SINR", "UAV_Sending_Interval", "Modulation", 
                                                             "Num_Reliable", "Num_Q_Overflow", "Num_Incr_Rcvd", "Num_Delay_Excd"]].values
            if row_index == 0:
                packet_state_index = index
            else:
                packet_state_index = index - self.cumulative_index[row_index-1]

            if packet_state_index < df_row[4]:
                # Case of reliable packet
                packet_state = 0
            elif packet_state_index < df_row[4] + df_row[5]:
                # Case of queue overflow packet
                packet_state = 1
            elif packet_state_index < df_row[4] + df_row[5] + df_row[6]:
                # Case of incr rcvd packet
                packet_state = 2
            else:
                # Case of delay excd packet
                packet_state = 3

            # Store sample
            X[i,] = df_row[0:4]
            # Store class
            y[i] = packet_state
        
        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

def normalize_data(df, columns=[], save_details_path=None):
    '''
    columns: The pandas data columns to normalize, given as a list of column names
    '''
    # Define the ranges of parametrers
    max_mean_sinr = 10*math.log10(1123) # The max mean SINR calculated at (0,60) is 1122.743643457063 (linear)
    max_std_dev_sinr = 10*math.log10(466) # The max std dev SINR calculated at (0,60) is 465.2159856885714 (linear)
    min_mean_sinr = 10*math.log10(0.2) # The min mean SINR calculated at (1200,60) is 0.2251212887895188 (linear)
    min_std_dev_sinr = 10*math.log10(0.7) # The min std dev SINR calculated at (1200,300) is 0.7160093126585219 (linear)
    max_height = 300
    min_height = 60
    max_h_dist = 1200
    min_h_dist = 0

    # Normalize data (Min Max Normalization between [-1,1])
    if "Height" in columns:
        df["Height"] = df["Height"].apply(lambda x: 2*(x-min_height)/(max_height-min_height) - 1)
    if "U2G_H_Dist" in columns:
        df["U2G_H_Dist"] = df["U2G_H_Dist"].apply(lambda x: 2*(x-min_h_dist)/(max_h_dist-min_h_dist) - 1)
    if "Mean_SINR" in columns:
        df["Mean_SINR"] = df["Mean_SINR"].apply(lambda x: 2*(10*math.log10(x)-min_mean_sinr)/(max_mean_sinr-min_mean_sinr) - 1) # Convert to dB space
    if "Std_Dev_SINR" in columns:
        df["Std_Dev_SINR"] = df["Std_Dev_SINR"].apply(lambda x: 2*(10*math.log10(x)-min_std_dev_sinr)/(max_std_dev_sinr-min_std_dev_sinr) - 1) # Convert to dB space
    if "UAV_Sending_Interval" in columns:
        df["UAV_Sending_Interval"] = df["UAV_Sending_Interval"].replace({10:-1, 20:-0.5, 40:0, 66.7: 0.5, 100:1, 1000:2})
    if "Packet_State" in columns:
        df['Packet_State'] = df['Packet_State'].replace({"Reliable":0, "QUEUE_OVERFLOW":1, "RETRY_LIMIT_REACHED":2, "Delay_Exceeded":3})
    if "Modulation" in columns:
        df['Modulation'] = df['Modulation'].replace({"BPSK":1, "QPSK":0.3333, 16:-0.3333, "QAM-16":-0.3333, "QAM16":-0.3333, 64:-1, "QAM-64":-1, "QAM64":-1})

    # Record details of inputs and output for model
    if save_details_path is not None:
        f = open(os.path.join(save_details_path,"model_details.txt"), "w")
        f.write("Max Height (m): {}\n".format(max_height))
        f.write("Min Height (m): {}\n".format(min_height))
        f.write("Max H_Dist (m): {}\n".format(max_h_dist))
        f.write("Min H_Dist (m): {}\n".format(min_h_dist))
        f.write("Max Mean SINR (dB): {}\n".format(max_mean_sinr))
        f.write("Min Mean SINR (dB): {}\n".format(min_mean_sinr))
        f.write("Max Std Dev SINR (dB): {}\n".format(max_std_dev_sinr))
        f.write("Min Std Dev SINR (dB): {}\n".format(min_std_dev_sinr))
        f.write("[BPSK: 1, QPSK: 0.3333, QAM16: -0.3333, QAM64: -1]\n")
        f.write("UAV Sending Interval: [10:-1, 20:-0.5, 40:0, 100:0.5, 1000:1]\n")
        f.write("Output: ['Reliable':0, 'QUEUE_OVERFLOW':1, 'RETRY_LIMIT_REACHED':2, 'Delay_Exceeded':3]\n")
        f.close()

    return df

In [30]:
import pandas as pd
csv_path = "/home/research-student/omnetpp_sim_results/PCAP_Test/ParrotAR2_ConstantSI/test.csv"
df_dtypes = {"Horizontal_Distance": np.float64, "Height": np.int16,	"U2G_Distance": np.int32, "UAV_Sending_Interval": np.float64, "Mean_SINR": np.float64, "Std_Dev_SINR": np.float64,
                 "Num_Sent": np.int32, "Num_Reliable": np.int32, "Num_Delay_Excd": np.int32, "Num_Incr_Rcvd": np.int32, "Num_Q_Overflow": np.int32}
dataset_details = pd.read_csv(csv_path, 
                            usecols = ["Mean_SINR", "Std_Dev_SINR", "UAV_Sending_Interval", "Modulation", "Num_Sent", "Num_Reliable", "Num_Delay_Excd",
                                        "Num_Incr_Rcvd", "Num_Q_Overflow"],
                            dtype=df_dtypes)

dataset_details = normalize_data(dataset_details, columns=["Mean_SINR", "Std_Dev_SINR", "UAV_Sending_Interval", "Modulation"], save_details_path=None)                         
data_generator = DataGenerator(dataset_details, test_split=0, type='train', batch_size=100, shuffle='row')

In [31]:
X, y = data_generator.__getitem__(0)
for i in tqdm(range(1, data_generator.__len__())):
    X_i, y_i = data_generator.__getitem__(i)
    X = np.append(X, X_i).reshape(-1,4)
    y = np.append(y, y_i).reshape(-1,4)

df = pd.DataFrame(np.hstack((X,y)), columns=["Mean_SINR", "Std_Dev_SINR", "UAV_Sending_Interval", "Modulation", "Num_Reliable", "Num_Q_Overflow", "Num_Incr_Rcvd", "Num_Delay_Excd"])
df_recon = []
for name, group in df.groupby(["Mean_SINR", "Std_Dev_SINR"]):
    mean_sinr = group["Mean_SINR"].values[0]
    std_dev_sinr = group["Std_Dev_SINR"].values[0]
    uav_send_int = group["UAV_Sending_Interval"].values[0]
    modulation = group["Modulation"].values[0]
    num_reliable = group["Num_Reliable"].sum()
    num_delay_excd = group["Num_Delay_Excd"].sum()
    num_incr_rcvd = group["Num_Incr_Rcvd"].sum()
    num_q_overflow = group["Num_Q_Overflow"].sum()
    num_sent = num_reliable + num_delay_excd + num_incr_rcvd + num_q_overflow
    df_recon.append({"Mean_SINR": mean_sinr, "Std_Dev_SINR": std_dev_sinr, "UAV_Sending_Interval": uav_send_int, "Modulation": modulation, 
                     "Num_Sent": num_sent, "Num_Reliable": num_reliable, "Num_Delay_Excd": num_delay_excd, "Num_Incr_Rcvd": num_incr_rcvd, "Num_Q_Overflow": num_q_overflow})
df_recon = pd.DataFrame(df_recon)
df_recon.to_csv("/home/research-student/omnetpp_sim_results/PCAP_Test/ParrotAR2_ConstantSI/test_recon.csv")

  0%|          | 3/2122 [00:00<01:23, 25.34it/s]

100%|██████████| 2122/2122 [01:20<00:00, 26.24it/s]


In [1]:
import pandas as pd
df = pd.read_csv("/home/research-student/omnetpp_sim_results/PCAP_Test/ParrotAR2_ConstantSI/BPSK_Test.csv")
df_recon = []
for name, group in df.groupby(["Mean_SINR", "Std_Dev_SINR"]):
    mean_sinr = group["Mean_SINR"].values[0]
    std_dev_sinr = group["Std_Dev_SINR"].values[0]
    uav_send_int = group["UAV_Sending_Interval"].values[0]
    modulation = group["Modulation"].values[0]
    num_reliable = group["Reliable"].sum()
    num_delay_excd = group["Delay_Excd"].sum()
    num_incr_rcvd = group["Incr_Rcvd"].sum()
    num_q_overflow = group["Q_Overflow"].sum()
    num_sent = num_reliable + num_delay_excd + num_incr_rcvd + num_q_overflow
    df_recon.append({"Mean_SINR": mean_sinr, "Std_Dev_SINR": std_dev_sinr, "UAV_Sending_Interval": uav_send_int, "Modulation": modulation, 
                     "Num_Sent": num_sent, "Num_Reliable": num_reliable, "Num_Delay_Excd": num_delay_excd, "Num_Incr_Rcvd": num_incr_rcvd, "Num_Q_Overflow": num_q_overflow})
df_recon = pd.DataFrame(df_recon)
df_recon.to_csv("/home/research-student/omnetpp_sim_results/PCAP_Test/ParrotAR2_ConstantSI/BPSK_Generator_Test.csv")

In [10]:
df = pd.read_csv("/home/research-student/omnetpp_sim_results/PCAP_Test/ParrotAR2_ConstantSI/test_recon_training.csv")

KeyboardInterrupt: 

In [5]:
len(df)

22793873