In [8]:
# Import modules from Scikit-learn
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split   # Import train_test_split function
from sklearn import metrics   # import metrics modules for accuracy calculation
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import pandas as pd
from sklearn.utils import shuffle

In [9]:
# read the data
PATH = "../../my_data/identification-dataset/my_custom_data/big-identification-dataset.csv"
df = pd.read_csv(PATH)

# shuffle the data
# df = shuffle(df)

# drop NaN values
df.dropna(inplace=True)

In [10]:
# split the data subsets
df_10p = df.sample(frac = 0.1, random_state = 200)
df_20p = df.sample(frac = 0.2, random_state = 200)
df_30p = df.sample(frac = 0.3, random_state = 200)
df_40p = df.sample(frac = 0.4, random_state = 200)
df_50p = df.sample(frac = 0.5, random_state = 200)
df_60p = df.sample(frac = 0.6, random_state = 200)
df_70p = df.sample(frac = 0.7, random_state = 200)
df_80p = df.sample(frac = 0.8, random_state = 200)
df_90p = df.sample(frac = 0.9, random_state = 200)

subsets = [df_10p, df_20p, df_30p, df_40p, df_50p, df_60p, df_70p, df_80p, df_90p]

In [11]:
for d in subsets:
    print(d.size)

95792
191592
287384
383184
478976
574768
670568
766360
862160


In [12]:
# Create a pipeline object for the model
pipe_MLP = make_pipeline(StandardScaler(),
                         MLPClassifier(solver='adam',
                                       hidden_layer_sizes=(100,100,100,), # 3 hidden layers with (100x100x100) neurons
                                       random_state=0,
                                       max_iter=500           # TODO: tune it later
                                       )
                         )

In [13]:
# Extracted features 
X = df_10p[['tdoa', 'snr_an', 'power_dif', 'rx_snr', 'rx_powerdif', 'tof']]
y = df_10p['NLOS_material'] # Labels

In [14]:
# Split dataset into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3) # 70% training and 30% test

In [15]:
# Train the whole pipeline
pipe_MLP.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('mlpclassifier',
                 MLPClassifier(hidden_layer_sizes=(100, 100, 100), max_iter=500,
                               random_state=0))])

In [16]:
# Make predictions on the test data
y_pred = pipe_MLP.predict(X_test)

In [17]:
# Caluclate the accuracy on test data predicitons
print(f'Test Accuracy: {metrics.accuracy_score(y_test, y_pred) * 100}%')

Test Accuracy: 86.33453938213192%
