In [13]:
# Import modules from Scikit-learn
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split   # Import train_test_split function
from sklearn import metrics   # import metrics modules for accuracy calculation
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import pandas as pd
from sklearn.utils import shuffle
from tqdm import tqdm

# Classification with Reduced Data

In this experiment, the training will be conducted on 10%, 205, ... , 100% of the data. On each data subset, the model will be trained 10x. <br>
For each subset, the mean accuracy will be calculated as well as the standard deviation of the accuracy. 

In [14]:
# Create a pipeline object for the model
pipe_MLP = make_pipeline(StandardScaler(),
                         MLPClassifier(solver='adam',
                                       hidden_layer_sizes=(100,100,100,100), # 3 hidden layers with (100x100x100) neurons
                                       random_state=0,
                                       max_iter=500
                                       )
                         )

In [15]:
# read the data
PATH = "../../my_data/identification-dataset/my_custom_data/big-identification-dataset.csv"
df = pd.read_csv(PATH)

# shuffle the data
# df = shuffle(df)

# drop NaN values
df.dropna(inplace=True)

In [16]:
# split the data subsets
df_10p = df.sample(frac = 0.1, random_state = 200)
df_20p = df.sample(frac = 0.2, random_state = 200)
df_30p = df.sample(frac = 0.3, random_state = 200)
df_40p = df.sample(frac = 0.4, random_state = 200)
df_50p = df.sample(frac = 0.5, random_state = 200)
df_60p = df.sample(frac = 0.6, random_state = 200)
df_70p = df.sample(frac = 0.7, random_state = 200)
df_80p = df.sample(frac = 0.8, random_state = 200)
df_90p = df.sample(frac = 0.9, random_state = 200)

subsets = [df_10p, df_20p, df_30p, df_40p, df_50p, df_60p, df_70p, df_80p, df_90p]

In [17]:
train_iter = range(10)

all_acc = []
for d in subsets:
    
    # Extracted features
    X = d[['tdoa', 'snr_an', 'power_dif', 'rx_snr', 'rx_powerdif', 'tof']]
    y = d['NLOS_material'] # Labels

    # Split dataset into training and test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3) # 70% training and 30% test

    train_acc = [] # stores all accuracies on a subset
    for i in tqdm(train_iter):
        # Train the whole pipeline
        pipe_MLP.fit(X_train, y_train)

        # Make predictions on the test data
        y_pred = pipe_MLP.predict(X_test)

        # store acc
        train_acc.append(metrics.accuracy_score(y_test, y_pred))
    
    # store the accuracy of all training iteration
    all_acc.append(train_acc)

100%|██████████| 10/10 [05:08<00:00, 30.83s/it]
100%|██████████| 10/10 [19:26<00:00, 116.67s/it]
100%|██████████| 10/10 [17:24<00:00, 104.44s/it]
100%|██████████| 10/10 [32:45<00:00, 196.55s/it]
100%|██████████| 10/10 [55:21<00:00, 332.11s/it]
100%|██████████| 10/10 [56:43<00:00, 340.39s/it]
100%|██████████| 10/10 [1:27:37<00:00, 525.77s/it]
100%|██████████| 10/10 [1:45:58<00:00, 635.88s/it]
100%|██████████| 10/10 [1:44:47<00:00, 628.72s/it]


In [18]:
import numpy as np

In [19]:
acc_m =np.array([np.array(xi) for xi in all_acc])

In [20]:
acc_m.shape

(9, 10)

In [21]:
acc_m

array([[0.84052324, 0.84052324, 0.84052324, 0.84052324, 0.84052324,
        0.84052324, 0.84052324, 0.84052324, 0.84052324, 0.84052324],
       [0.85511482, 0.85511482, 0.85511482, 0.85511482, 0.85511482,
        0.85511482, 0.85511482, 0.85511482, 0.85511482, 0.85511482],
       [0.85960843, 0.85960843, 0.85960843, 0.85960843, 0.85960843,
        0.85960843, 0.85960843, 0.85960843, 0.85960843, 0.85960843],
       [0.86485734, 0.86485734, 0.86485734, 0.86485734, 0.86485734,
        0.86485734, 0.86485734, 0.86485734, 0.86485734, 0.86485734],
       [0.86315555, 0.86315555, 0.86315555, 0.86315555, 0.86315555,
        0.86315555, 0.86315555, 0.86315555, 0.86315555, 0.86315555],
       [0.86758838, 0.86758838, 0.86758838, 0.86758838, 0.86758838,
        0.86758838, 0.86758838, 0.86758838, 0.86758838, 0.86758838],
       [0.86566986, 0.86566986, 0.86566986, 0.86566986, 0.86566986,
        0.86566986, 0.86566986, 0.86566986, 0.86566986, 0.86566986],
       [0.86474825, 0.86474825, 0.8647482