In [43]:
import pandas as pd

# Baseline MLP Multiclass Classification Experiment

In [44]:
PATH = "../../my_data/identification-dataset/my_custom_data/big-identification-dataset.csv"
df = pd.read_csv(PATH)

In [45]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 160260 entries, 0 to 160259
Data columns (total 8 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   tdoa           160260 non-null  float64
 1   snr_an         160260 non-null  float64
 2   power_dif      160213 non-null  float64
 3   rx_snr         119746 non-null  float64
 4   rx_powerdif    119744 non-null  float64
 5   tof            119746 non-null  float64
 6   material       160260 non-null  object 
 7   NLOS_material  160260 non-null  int64  
dtypes: float64(6), int64(1), object(1)
memory usage: 9.8+ MB


In [46]:
df.dropna(inplace=True)

In [47]:
df.head()

Unnamed: 0,tdoa,snr_an,power_dif,rx_snr,rx_powerdif,tof,material,NLOS_material
0,-0.211129,145.678574,10.307022,215.300003,11.049919,5.009119,cardboard,1
1,-0.197054,134.321426,10.824654,192.0,11.397888,5.027886,cardboard,1
2,-0.197054,134.321426,10.824654,236.149994,9.84436,5.06542,cardboard,1
3,-0.248663,104.10714,11.064209,236.149994,9.84436,5.06542,cardboard,1
4,-0.182979,113.25,10.954155,185.0,10.330307,5.027886,cardboard,1


In [48]:
df.describe()

Unnamed: 0,tdoa,snr_an,power_dif,rx_snr,rx_powerdif,tof,NLOS_material
count,119744.0,119744.0,119744.0,119744.0,119744.0,119744.0,119744.0
mean,-0.009004,120.834692,12.863304,198.813199,11.032414,5.043572,1.750142
std,1.558409,39.876752,3.804028,31.19874,1.248188,0.022086,2.126004
min,-373.351807,6.208333,-17.027435,0.118671,-35.275032,4.952817,0.0
25%,-0.211129,109.71875,11.155712,177.791672,10.306816,5.027886,0.0
50%,-0.009384,127.75,12.243141,198.449997,10.761261,5.041961,1.0
75%,0.211129,144.041672,12.946575,222.399994,11.368462,5.056036,4.0
max,4.077143,326.200012,32.832855,330.0,36.156631,5.14518,6.0


In [49]:
df['NLOS_material'].unique()

array([1, 0, 2, 3, 4, 5, 6])

## Classification

In [50]:
# Import modules from Scikit-learn
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split   # Import train_test_split function
from sklearn import metrics   # import metrics modules for accuracy calculation
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [51]:
# Create a pipeline object for the model
pipe_MLP = make_pipeline(StandardScaler(),
                         MLPClassifier(solver='adam',
                                       hidden_layer_sizes=(100,100,100,100),
                                       random_state=0,
                                       max_iter=500,           # TODO: tune it later
                                       verbose=True
                                       )
                         )

In [52]:
# Extracted features 
X = df[['tdoa', 'snr_an', 'power_dif', 'rx_snr', 'rx_powerdif', 'tof']]
y = df['NLOS_material'] # Labels

In [53]:
# Split dataset into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3) # 70% training and 30% test

In [54]:
# Train the whole pipeline
pipe_MLP.fit(X_train, y_train)

Iteration 1, loss = 0.53642023
Iteration 2, loss = 0.33426130
Iteration 3, loss = 0.31675569
Iteration 4, loss = 0.30810004
Iteration 5, loss = 0.29959046
Iteration 6, loss = 0.29607551
Iteration 7, loss = 0.29626705
Iteration 8, loss = 0.29221354
Iteration 9, loss = 0.29028385
Iteration 10, loss = 0.28880795
Iteration 11, loss = 0.28735787
Iteration 12, loss = 0.28692276
Iteration 13, loss = 0.28505720
Iteration 14, loss = 0.28380460
Iteration 15, loss = 0.28363918
Iteration 16, loss = 0.28201049
Iteration 17, loss = 0.28037814
Iteration 18, loss = 0.28168484
Iteration 19, loss = 0.28271496
Iteration 20, loss = 0.28083537
Iteration 21, loss = 0.27779552
Iteration 22, loss = 0.27689673
Iteration 23, loss = 0.27733974
Iteration 24, loss = 0.27489947
Iteration 25, loss = 0.27524602
Iteration 26, loss = 0.27495216
Iteration 27, loss = 0.27400615
Iteration 28, loss = 0.27378021
Iteration 29, loss = 0.27374182
Iteration 30, loss = 0.27458695
Iteration 31, loss = 0.27201584
Iteration 32, los

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('mlpclassifier',
                 MLPClassifier(hidden_layer_sizes=(100, 100, 100, 100),
                               max_iter=500, random_state=0, verbose=True))])

In [57]:
# Make predictions on the test data
y_pred = pipe_MLP.predict(X_test)

In [59]:
# Caluclate the accuracy on test data predicitons
print(f'Test Accuracy: {metrics.accuracy_score(y_test, y_pred) * 100}%')

Test Accuracy: 86.70526667408974%
