### Preparing Test Dataset

The main purpouse of this notebook is to generate the pickle file containing test dataset we use as an input in our main training script!

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import seaborn as sns

# Set the URL to data
data_path = './ITU_dataset/'             ## Path to where data is stored
output_train_sim = data_path

The training files (output file) include the information of each simulation as follows:

1. Header line, indicating the name of the input file used for the simulation (the name of the input file contains the OBSS/PD-based threshold used in each case)

2. Array with the throughput (in Mbps) obtained by each STA of the BSS of interest

3. Array with the interference (in dBm) sensed by the AP of interest, from all the other APs.

4. Array with the RSSI (in dBm) received by each STA of the BSS of interest, from its corresponding AP.

5. Array with the average SINR (in dB) experienced by each STA of the BSS of interest.


In [2]:
# First we read the test data

test_file_root_name = 'output_11ax_sr_simulations_test.txt'
column_names = ["input_file_name"]


# First we put the data from first scenario into pandas dataframe
output_data_test_raw = pd.read_csv(data_path + test_file_root_name, sep="\n", names=["raw_data"])
data_test = pd.DataFrame()
for x in range(len(column_names)):
    data_test[column_names[x]] = output_data_test_raw[(output_data_test_raw.index + x) % 5== 0].reset_index(drop=True)


# Remove the KOMONDOR SIMULATION string
data_test["input_file_name"] = data_test["input_file_name"].str.replace("KOMONDOR SIMULATION 'sim_", "")
data_test[['input_file_name', 'seed']] = data_test['input_file_name'].str.split('(', 1, expand=True)
data_test['input_file_name'] = data_test['input_file_name'].str.replace("'", "")
data_test["seed"] = data_test["seed"].str.replace(')', '', regex=True)

data_test

Unnamed: 0,input_file_name,seed
0,input_nodes_test_s000_c-77.csv,seed 1991
1,input_nodes_test_s001_c-77.csv,seed 1991
2,input_nodes_test_s002_c-82.csv,seed 1991
3,input_nodes_test_s003_c-73.csv,seed 1991
4,input_nodes_test_s004_c-70.csv,seed 1991
...,...,...
995,input_nodes_test_s995_c-76.csv,seed 1991
996,input_nodes_test_s996_c-77.csv,seed 1991
997,input_nodes_test_s997_c-74.csv,seed 1991
998,input_nodes_test_s998_c-78.csv,seed 1991


In [3]:
# Split the data based on the context and OBSS/PD information to the dataset
data_test["context"] = data_test["input_file_name"].str.replace("input_nodes_s", "")
data_test["context"] = data_test["context"].str[-13:-10].astype(int)
data_test["OBSS_PD"] = data_test["input_file_name"].str[-7:-5].astype(int)
#data_test["context"][0]
data_test
#output_data_test
#data_scene3

Unnamed: 0,input_file_name,seed,context,OBSS_PD
0,input_nodes_test_s000_c-77.csv,seed 1991,0,77
1,input_nodes_test_s001_c-77.csv,seed 1991,1,77
2,input_nodes_test_s002_c-82.csv,seed 1991,2,82
3,input_nodes_test_s003_c-73.csv,seed 1991,3,73
4,input_nodes_test_s004_c-70.csv,seed 1991,4,70
...,...,...,...,...
995,input_nodes_test_s995_c-76.csv,seed 1991,995,76
996,input_nodes_test_s996_c-77.csv,seed 1991,996,77
997,input_nodes_test_s997_c-74.csv,seed 1991,997,74
998,input_nodes_test_s998_c-78.csv,seed 1991,998,78


In [4]:
# Helper Functions for reading the dataset
def read_output_simulator(fp, dataset_lenght):       
    RSSI_list = [] 
    SINR_list = [] 
    interference_list = [] 
    throughput_list = []
    # To manually fix bugs in the dataset
    RSSI_bugs = [np.array([-80.77,-81.19,-65.59]), np.array([-61.77,-62.19,-46.59]), np.array([-61.77,-62.19,-46.59]),
                np.array([-61.77,-62.19,-46.59]), np.array([-61.77,-62.19,-46.59]), np.array([-61.77,-62.19,-46.59]), 
                np.array([-61.77,-62.19,-46.59]), np.array([-61.77,-62.19,-46.59])] 
    RSSI_bug_index = 0
    
     # To manually fix bugs in the dataset
    SINR_bugs = [np.array([35.05, 33.56]), np.array([35.16, 30.83]), np.array([33.95, 34.76, 39.32]), np.array([32.91,31.65,47.57]),
                np.array([32.89,31.68,47.43]), np.array([32.92,31.72,47.38]), np.array([32.69,31.58,47.12]) , np.array([32.73,31.56,47.44]), 
                np.array([32.48,31.67,47.61]), np.array([32.74,31.67,47.27]), np.array([32.74,31.61,47.59]), np.array([32.91,31.67,47.61])]
    SINR_bug_index = 0
    
    fp = fp.readlines()
    line_index = 0 
    for index in range(dataset_lenght):
        try:
            line = fp[line_index]                  # Initial line (name of the scenaio)
            line_index += 1
        except:
            print("The problematic index is", line_index)
            line_index += 1
        #print("reading...",line)
        # Throughput 
        throughput = fp[line_index]                 # Throughput
        line_index += 1
        throughput = throughput.strip()         # Remove \n ch
        if "," in throughput:
            throughput = np.array(throughput[0:len(throughput)].split(',')).astype(np.float)
            throughput_list.append(throughput)
        else:
            try:
                throughput = float(throughput)
            except:
                print("There is an throughput data bug at line ", line_index, "raw data is",  throughput)
                print(throughput)
                throughput = 0.0
            throughput_list.append(throughput)
        
        # Interferences
        interference = fp[line_index]             # Interferences
        interference = interference.strip()     # Remove \n ch
        line_index += 1
        if "," in interference:
            interference = np.array(interference[0:len(interference)].split(',')).astype(np.float)
            interference_list.append(interference)
        else:
            interference = float(interference)
            interference_list.append(interference)
        
        # RSSI
        RSSI = fp[line_index]           # RSSI
        RSSI = RSSI.strip()         # Remove \n ch
        line_index += 1
        if "," in RSSI:
            try:
                RSSI = np.array(RSSI[0:len(RSSI)].split(',')).astype(np.float)
            except:
                print("There is an RSSI data bug at line ",line_index, "raw data is",  RSSI)
                RSSI = RSSI_bugs[RSSI_bug_index]
                RSSI_bug_index += 1
            RSSI_list.append(RSSI)
        else:
            RSSI = float(RSSI)
            RSSI_list.append(RSSI)
            
        # SINR
        SINR = fp[line_index]          # SINR
        SINR = SINR.strip()         # Remove \n ch
        line_index += 1
        
        if "," in SINR:
            #SINR.replace('«¤Ð', '')
            #SINR = np.array(SINR[0:len(SINR)].split(',')).astype(np.float)
            #SINR_list.append(SINR)
            try:
                SINR = np.array(SINR[0:len(SINR)].split(',')).astype(np.float)
            except:
                print("There is an SINR data bug at line ",line_index, "raw data is",  SINR)
                SINR = SINR_bugs[SINR_bug_index] # Push in manual corrections
                SINR_bug_index += 1  
            SINR_list.append(SINR)
        else:
            SINR = SINR.replace('f', '')  # As there is a bug in the dataset
            SINR = float(SINR)
            SINR_list.append(SINR)

    return( RSSI_list, SINR_list, interference_list, throughput_list)


def read_input_files(input_dataset_path, input_dataset_names_list):
    """
    A functions that returns input information, currently we only append the data we think is required...
    
    
    All options are below:
    
    ['node_code', 'node_type', 'wlan_code', 'x(m)', 'y(m)', 'z(m)', 'central_freq(GHz)', 
    'channel_bonding_model', 'primary_channel', 'min_channel_allowed', 'max_channel_allowed', 
    'tpc_default(dBm)', 'cca_default(dBm)', 'traffic_model', 'traffic_load[pkt/s]',
    'packet_length', 'num_packets_aggregated', 'capture_effect_model',
    'capture_effect_thr', 'constant_per', 'pifs_activated', 'cw_adaptation',
    'cont_wind', 'cont_wind_stage', 'bss_color', 'spatial_reuse_group',
    'non_srg_obss_pd', 'srg_obss_pd']
    
    """

    list_node_type = []
    x_pos_list = []
    y_pos_list = []
    
    for dataset_name in input_dataset_names_list:
        df  = pd.read_csv(input_dataset_path + dataset_name[1:-1], sep = ';', usecols=['node_type', 'x(m)', 'y(m)'])
        list_node_type.append(df['node_type'].tolist())
        x_pos_list.append(df['x(m)'].tolist())
        y_pos_list.append(df['y(m)'].tolist())
        

    return list_node_type, x_pos_list, y_pos_list

In [6]:
# ADD RSSI, Interference, SNR, and Throughput to the dataframe
output_train_sim_test = output_train_sim + test_file_root_name 

fp_test = open(output_train_sim_test, 'r')
RSSI, SINR, interference, throughput = read_output_simulator(fp_test, len(data_test))
data_test["RSSI"] = RSSI
data_test["SINR"] = SINR
data_test["interference"] = interference
data_test["throughput"] = throughput
data_test


Unnamed: 0,input_file_name,seed,context,OBSS_PD,RSSI,SINR,interference,throughput
0,input_nodes_test_s000_c-77.csv,seed 1991,0,77,"[-60.39, -58.99, -60.87]","[34.02, 35.61, 33.87]","[-108.81, -106.41, -70.14]",0.0
1,input_nodes_test_s001_c-77.csv,seed 1991,1,77,"[-55.85, -52.74]","[35.86, 39.36]","[-151.43, -82.51, -50.02, -104.25]",0.0
2,input_nodes_test_s002_c-82.csv,seed 1991,2,82,"[-51.95, -56.32]","[41.68, 37.76]","[-112.98, -56.95, -68.93, -69.41, -102.28]",0.0
3,input_nodes_test_s003_c-73.csv,seed 1991,3,73,"[-56.98, -59.23]","[31.43, 27.75]","[-98.52, -58.83, -142.87, -85.4, -116.4]",0.0
4,input_nodes_test_s004_c-70.csv,seed 1991,4,70,"[-60.08, -61.44, -62.08, -41.75]","[34.13, 33.39, 32.81, 53.01]","[-127.9, -67.51, -43.13]",0.0
...,...,...,...,...,...,...,...,...
995,input_nodes_test_s995_c-76.csv,seed 1991,995,76,"[-61.44, -60.17, -44.94]","[33.56, 34.83, 50.06]",-132.84,0.0
996,input_nodes_test_s996_c-77.csv,seed 1991,996,77,"[-62.83, -56.71, -66.33]","[25.3, 31.1, 23.11]",-89.17,0.0
997,input_nodes_test_s997_c-74.csv,seed 1991,997,74,"[-57.92, -54.5, -44.7, -51.26]","[35.91, 38.97, 47.14, 42.45]","[-94.75, -104.05, -93.52]",0.0
998,input_nodes_test_s998_c-78.csv,seed 1991,998,78,"[-59.54, -62.42, -61.14]","[11.19, 1.56, 2.54]","[-47.31, -62.99]",0.0


In [8]:
# Now add the input information to the dataframe as will be used, note this takes a while...try to make it more efficient
input_dataset_path_test = data_path+'simulator_input_files_test/i' # For some reason I needed to add thee first i...

node_types, x_positions, y_postions = read_input_files(input_dataset_path_test, data_test['input_file_name'].tolist())
data_test["node_type"] = node_types
data_test["x(m)"] = x_positions
data_test["y(m)"] = y_postions


In [9]:
data_test

Unnamed: 0,input_file_name,seed,context,OBSS_PD,RSSI,SINR,interference,throughput,node_type,x(m),y(m)
0,input_nodes_test_s000_c-77.csv,seed 1991,0,77,"[-60.39, -58.99, -60.87]","[34.02, 35.61, 33.87]","[-108.81, -106.41, -70.14]",0.0,"[0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1]","[59.6921, 61.6532, 67.746, 68.0925, 18.7059, 1...","[44.7691, 36.1712, 44.6847, 48.2394, 14.0294, ..."
1,input_nodes_test_s001_c-77.csv,seed 1991,1,77,"[-55.85, -52.74]","[35.86, 39.36]","[-151.43, -82.51, -50.02, -104.25]",0.0,"[0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, ...","[0.8129, 2.8465, 5.6986, 79.9533, 75.3193, 81....","[0.6097, 6.7711, 2.2166, 59.9649, 64.9153, 50...."
2,input_nodes_test_s002_c-82.csv,seed 1991,2,82,"[-51.95, -56.32]","[41.68, 37.76]","[-112.98, -56.95, -68.93, -69.41, -102.28]",0.0,"[0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, ...","[14.3123, 11.0064, 8.6821, 58.862, 56.3002, 64...","[10.7342, 14.2652, 7.0855, 44.1465, 37.3532, 4..."
3,input_nodes_test_s003_c-73.csv,seed 1991,3,73,"[-56.98, -59.23]","[31.43, 27.75]","[-98.52, -58.83, -142.87, -85.4, -116.4]",0.0,"[0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, ...","[79.4459, 77.6251, 84.296, 46.9751, 50.0455, 4...","[59.5844, 66.3731, 52.9952, 35.2313, 26.7568, ..."
4,input_nodes_test_s004_c-70.csv,seed 1991,4,70,"[-60.08, -61.44, -62.08, -41.75]","[34.13, 33.39, 32.81, 53.01]","[-127.9, -67.51, -43.13]",0.0,"[0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1]","[60.9108, 68.5195, 63.0799, 55.0615, 59.4391, ...","[45.6831, 41.5853, 36.5205, 37.8326, 44.3601, ..."
...,...,...,...,...,...,...,...,...,...,...,...
995,input_nodes_test_s995_c-76.csv,seed 1991,995,76,"[-61.44, -60.17, -44.94]","[33.56, 34.83, 50.06]",-132.84,0.0,"[0, 1, 1, 1, 0, 1, 1]","[14.749, 5.3724, 6.4501, 17.1472, 76.843, 79.7...","[11.0617, 11.8923, 8.4691, 12.2197, 57.6323, 6..."
996,input_nodes_test_s996_c-77.csv,seed 1991,996,77,"[-62.83, -56.71, -66.33]","[25.3, 31.1, 23.11]",-89.17,0.0,"[0, 1, 1, 1, 0, 1, 1]","[54.1804, 61.8106, 58.9991, 62.6578, 79.3154, ...","[40.6353, 38.3345, 42.402, 35.4386, 59.4865, 5..."
997,input_nodes_test_s997_c-74.csv,seed 1991,997,74,"[-57.92, -54.5, -44.7, -51.26]","[35.91, 38.97, 47.14, 42.45]","[-94.75, -104.05, -93.52]",0.0,"[0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, ...","[62.2739, 69.3201, 61.0139, 61.3009, 63.5181, ...","[46.7054, 44.1464, 52.4484, 44.2884, 51.1091, ..."
998,input_nodes_test_s998_c-78.csv,seed 1991,998,78,"[-59.54, -62.42, -61.14]","[11.19, 1.56, 2.54]","[-47.31, -62.99]",0.0,"[0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1]","[30.8932, 39.1703, 20.8994, 29.3953, 33.5248, ...","[23.1699, 22.1055, 23.354, 14.0495, 25.1436, 2..."


In [10]:
#Save to picle
data_test.to_pickle('cleaned_dataset/test.pkl')

In [11]:
dataset_test = pd.read_pickle('cleaned_dataset/test.pkl')
dataset_test

Unnamed: 0,input_file_name,seed,context,OBSS_PD,RSSI,SINR,interference,throughput,node_type,x(m),y(m)
0,input_nodes_test_s000_c-77.csv,seed 1991,0,77,"[-60.39, -58.99, -60.87]","[34.02, 35.61, 33.87]","[-108.81, -106.41, -70.14]",0.0,"[0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1]","[59.6921, 61.6532, 67.746, 68.0925, 18.7059, 1...","[44.7691, 36.1712, 44.6847, 48.2394, 14.0294, ..."
1,input_nodes_test_s001_c-77.csv,seed 1991,1,77,"[-55.85, -52.74]","[35.86, 39.36]","[-151.43, -82.51, -50.02, -104.25]",0.0,"[0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, ...","[0.8129, 2.8465, 5.6986, 79.9533, 75.3193, 81....","[0.6097, 6.7711, 2.2166, 59.9649, 64.9153, 50...."
2,input_nodes_test_s002_c-82.csv,seed 1991,2,82,"[-51.95, -56.32]","[41.68, 37.76]","[-112.98, -56.95, -68.93, -69.41, -102.28]",0.0,"[0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, ...","[14.3123, 11.0064, 8.6821, 58.862, 56.3002, 64...","[10.7342, 14.2652, 7.0855, 44.1465, 37.3532, 4..."
3,input_nodes_test_s003_c-73.csv,seed 1991,3,73,"[-56.98, -59.23]","[31.43, 27.75]","[-98.52, -58.83, -142.87, -85.4, -116.4]",0.0,"[0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, ...","[79.4459, 77.6251, 84.296, 46.9751, 50.0455, 4...","[59.5844, 66.3731, 52.9952, 35.2313, 26.7568, ..."
4,input_nodes_test_s004_c-70.csv,seed 1991,4,70,"[-60.08, -61.44, -62.08, -41.75]","[34.13, 33.39, 32.81, 53.01]","[-127.9, -67.51, -43.13]",0.0,"[0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1]","[60.9108, 68.5195, 63.0799, 55.0615, 59.4391, ...","[45.6831, 41.5853, 36.5205, 37.8326, 44.3601, ..."
...,...,...,...,...,...,...,...,...,...,...,...
995,input_nodes_test_s995_c-76.csv,seed 1991,995,76,"[-61.44, -60.17, -44.94]","[33.56, 34.83, 50.06]",-132.84,0.0,"[0, 1, 1, 1, 0, 1, 1]","[14.749, 5.3724, 6.4501, 17.1472, 76.843, 79.7...","[11.0617, 11.8923, 8.4691, 12.2197, 57.6323, 6..."
996,input_nodes_test_s996_c-77.csv,seed 1991,996,77,"[-62.83, -56.71, -66.33]","[25.3, 31.1, 23.11]",-89.17,0.0,"[0, 1, 1, 1, 0, 1, 1]","[54.1804, 61.8106, 58.9991, 62.6578, 79.3154, ...","[40.6353, 38.3345, 42.402, 35.4386, 59.4865, 5..."
997,input_nodes_test_s997_c-74.csv,seed 1991,997,74,"[-57.92, -54.5, -44.7, -51.26]","[35.91, 38.97, 47.14, 42.45]","[-94.75, -104.05, -93.52]",0.0,"[0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, ...","[62.2739, 69.3201, 61.0139, 61.3009, 63.5181, ...","[46.7054, 44.1464, 52.4484, 44.2884, 51.1091, ..."
998,input_nodes_test_s998_c-78.csv,seed 1991,998,78,"[-59.54, -62.42, -61.14]","[11.19, 1.56, 2.54]","[-47.31, -62.99]",0.0,"[0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1]","[30.8932, 39.1703, 20.8994, 29.3953, 33.5248, ...","[23.1699, 22.1055, 23.354, 14.0495, 25.1436, 2..."
