In [1]:
import tensorflow as tf
from keras.models import Model
from keras.optimizers import Adam
from keras import layers, callbacks

from keras.utils import to_categorical

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import numpy as np

from tensorflow.python.keras.utils.vis_utils import plot_model
import pydot

from scipy.stats import norm
from scipy import stats
import os

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
import pickle
import dataframe_image as dfi

2023-05-09 08:07:15.207285: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-09 08:07:15.233788: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


#### Save parameters

In [2]:
time_data_amount = 20
nr_classes = 3


### Gotta load in some data

In [3]:
datapath = '../../../All generated data/'
labelpath = '../../../All generated labels/'
data_list = os.listdir(datapath)
#print(data_list)

#all_data = [] #if we want to have data and labels in one list
all_datapoints = []
all_labels = []

total_channels = 79
total_scans_pr_sample = 20
classes = 3


for csv_file in data_list:
    data_file = datapath + csv_file
    current_data_file = pd.read_csv(data_file,header=None)

    label_file = labelpath + csv_file
    label_file = label_file.replace('.csv', '_labels.csv')
    current_label_file = pd.read_csv(label_file,header=None)

    for data_iter in range(len(current_data_file.index)):
        #Pulling out the data from a row and putting it in the list
        current_data_point = np.array(current_data_file.iloc[data_iter])
        current_data_point = current_data_point.reshape(total_scans_pr_sample,total_channels)
        all_datapoints.append(current_data_point)
        
        #adding the label to the datamatrix as the last row
        label_row = np.array(current_label_file.iloc[data_iter])
        label_row = label_row.reshape(1,total_channels)
        all_labels.append(label_row)
        
        #all_data.append(np.vstack([current_data_point, label_row])) #if we want to have data and labels in one list


In [4]:
print(all_labels[1].shape)
print(len(all_labels))
print(len(all_datapoints))

(1, 79)
10981
10981


#### Pick out one channel for each sample
For now it takes the same channel for all samples

In [5]:
chosen_channels = list(range(1,78,3))

complete_data = []
complete_labels = []

for iter in range(len(all_datapoints)):
    for channel in chosen_channels:
        complete_data.append(all_datapoints[iter][0:time_data_amount,channel])
        complete_labels.append(all_labels[iter][:,channel])
    

#quick check to make sure it works
print(complete_data[1].shape)
print(len(complete_data))
print(complete_labels[1].shape)
print(len(complete_labels))

(20,)
285506
(1,)
285506


### Splits data in train and test

In [6]:
data_train, data_test, labels_train, labels_test = train_test_split(complete_data, complete_labels, train_size=0.8, random_state=112)

# One hot encoding
labels_test = to_categorical(labels_test)
labels_train = to_categorical(labels_train)

data_train = np.array(data_train)
data_test = np.array(data_test)
labels_train = np.array(labels_train)
labels_test = np.array(labels_test)
print(labels_test.shape)

(57102, 3)


### Normalise data a bit

In [7]:
# Make a scaler from training data
scaler = preprocessing.StandardScaler().fit(data_train)

# scale everything using that scaler
data_train = scaler.transform(data_train)
data_test = scaler.transform(data_test)

### Stuff for class weights

In [8]:
def generate_class_weights(class_series, multi_class=True, one_hot_encoded=False):
  """
  Method to generate class weights given a set of multi-class or multi-label labels, both one-hot-encoded or not.
  Some examples of different formats of class_series and their outputs are:
    - generate_class_weights(['mango', 'lemon', 'banana', 'mango'], multi_class=True, one_hot_encoded=False)
    {'banana': 1.3333333333333333, 'lemon': 1.3333333333333333, 'mango': 0.6666666666666666}
    - generate_class_weights([[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0]], multi_class=True, one_hot_encoded=True)
    {0: 0.6666666666666666, 1: 1.3333333333333333, 2: 1.3333333333333333}
    - generate_class_weights([['mango', 'lemon'], ['mango'], ['lemon', 'banana'], ['lemon']], multi_class=False, one_hot_encoded=False)
    {'banana': 1.3333333333333333, 'lemon': 0.4444444444444444, 'mango': 0.6666666666666666}
    - generate_class_weights([[0, 1, 1], [0, 0, 1], [1, 1, 0], [0, 1, 0]], multi_class=False, one_hot_encoded=True)
    {0: 1.3333333333333333, 1: 0.4444444444444444, 2: 0.6666666666666666}
  The output is a dictionary in the format { class_label: class_weight }. In case the input is one hot encoded, the class_label would be index
  of appareance of the label when the dataset was processed. 
  In multi_class this is np.unique(class_series) and in multi-label np.unique(np.concatenate(class_series)).
  Author: Angel Igareta (angel@igareta.com)
  """
  if multi_class:
    # If class is one hot encoded, transform to categorical labels to use compute_class_weight   
    if one_hot_encoded:
      class_series = np.argmax(class_series, axis=1)
  
    # Compute class weights with sklearn method
    class_labels = np.unique(class_series)
    class_weights = compute_class_weight(class_weight='balanced', classes=class_labels, y=class_series)
    return dict(zip(class_labels, class_weights))
  else:
    # It is neccessary that the multi-label values are one-hot encoded
    mlb = None
    if not one_hot_encoded:
      mlb = MultiLabelBinarizer()
      class_series = mlb.fit_transform(class_series)

    n_samples = len(class_series)
    n_classes = len(class_series[0])

    # Count each class frequency
    class_count = [0] * n_classes
    for classes in class_series:
        for index in range(n_classes):
            if classes[index] != 0:
                class_count[index] += 1
    
    # Compute class weights using balanced method
    class_weights = [n_samples / (n_classes * freq) if freq > 0 else 1 for freq in class_count]
    class_labels = range(len(class_weights)) if mlb is None else mlb.classes_
    return dict(zip(class_labels, class_weights))

In [9]:
class_weights_list = []
class_weights_list.append(np.array([1.0,1.0,1.0]))
#class_weights_list.append(generate_class_weights(labels_train, multi_class=False, one_hot_encoded=True))

for i in range(50):
    _weights = np.random.default_rng().uniform(low=[0.25, 0.5, 0.5], high=[1, 1.5, 1.5], size=3)
    class_weights_list.append(_weights)

#print(class_weights_list)

In [10]:
def createDir(path: str):
    isExist = os.path.exists(path)
    if not isExist:
        os.makedirs(path)
        print('Created "' + path + '" directory')
    else:
        print('"'+ path + '" directory already existed')

createDir('test_of_W')

"test_of_W" directory already existed


## testing all weigths

In [None]:
idz = 0
for weights_under_test in class_weights_list:
    w_dict = dict(enumerate(weights_under_test, 0))

    signal_size = time_data_amount

    y = layers.Input(shape=(signal_size,1), dtype='float32', name='Input')

    x = layers.Conv1D(16, 6, padding='same', activation='relu', use_bias=True)(y)
    #x = layers.Dropout(rate=0.1)(x)

    x = layers.Conv1D(16, 3, padding='valid', activation='relu')(x)

    '''
    x = layers.MaxPool1D(pool_size=3,strides=1)(x)
    #x = layers.Dropout(rate=0.1)(x)

    x = layers.Dropout(rate=0.1)(x)
    x = layers.Conv1D(12, 3, padding='same', activation='relu')(x)
    x = layers.Dropout(rate=0.1)(x)
    x = layers.Conv1D(12, 3, padding='same', activation='relu')(x)
    x = layers.MaxPool1D(pool_size=3,strides=1)(x)
    '''
    x = layers.Flatten()(x)
    x = layers.Dropout(rate=0.05)(x)
    x = layers.Dense(32, activation='relu')(x)
    x = layers.Dropout(rate=0.2)(x)
    x = layers.Dense(16,activation='relu')(x)
    p = layers.Dense(classes, activation='softmax', name='p')(x)

    model = Model(inputs=[y], outputs=[p])
    #model.summary()

    # ------------- model compilation --------------
    ourAdam = Adam()
    model.compile(optimizer=ourAdam, loss='categorical_crossentropy', metrics=['accuracy'])
    
    # Set the model training parameters
    # Stop model training when the training loss is not dropped
    callbacks_list = [callbacks.EarlyStopping(
                            monitor='val_loss', 
                            patience=15, 
                            verbose=0, 
                            mode='auto',
                            restore_best_weights=True,
                        )
                                ]

    # ------------- Starting model Training --------------
    BATCH_SIZE = 4096
    EPOCH = 200
        

    hist = model.fit(data_train,labels_train, 
            batch_size = BATCH_SIZE, 
            epochs = EPOCH,
            verbose = 0,
            callbacks= callbacks_list,
            validation_split=0.25,
            class_weight=w_dict)
   
    evalDict = model.evaluate(data_test,labels_test)

    Y_test = np.argmax(labels_test, axis=1) # Convert one-hot to index
    y_pred = np.argmax(model.predict(data_test),axis=1)
    class_names = ['Empty channel', 'Wi-Fi', 'Bluetooth']
    class_report = classification_report(Y_test, y_pred, target_names=class_names)

    plt.figure()
    ConfusionMatrixDisplay.from_predictions(Y_test, y_pred, normalize='true',cmap='Greens',colorbar=False, display_labels=class_names)
    try:
        plt.title(np.array2string(weights_under_test, precision=3, separator=','))
    except Exception as gs:
        print(gs)
    plt.savefig('test_of_W/' + str(idz) + '_' + str(evalDict[1]) +'.pdf', format='pdf')
    plt.close()
    idz += 1

2023-05-09 08:07:27.501693: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-05-09 08:07:27.505570: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-05-09 08:07:27.505667: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

