## Notebook for machine learning methods for rf-based detection & classification
-  exploring, SVM, Logistic regression with PSD

In [1]:
import os
import numpy as np
from sklearn.model_selection import KFold
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.linear_model import LogisticRegression

from helper_functions import *
from latency_helpers import *

### Load Features

In [23]:
feat_folder = '../Features/'
feat_name = 'PSD'
seg_len = 100
# datestr = '2022-07-05'
n_per_seg = 256
interferences = ['WIFI', 'BLUE', 'BOTH', 'CLEAN']
Xs_arr, y_arr = load_features_arr(feat_folder, feat_name, seg_len, n_per_seg, interferences)

100%|█████████████████████████████████████████████████████████████████████████████████| 80/80 [00:00<00:00, 924.07it/s]


In [29]:
## Apply normalization
X_norm = Xs_arr
for n in range(len(Xs_arr)):
    X_norm[n] = Xs_arr[n]/max(Xs_arr[n])
X_norm.shape
y_arr = y_arr.reshape(len(y_arr),)

In [30]:
Xs_use = X_norm # Use normalized features
Xs_use.shape

(7795, 129)

## Train Test split

In [27]:
# split data into K-fold
k_fold = 10
cv = KFold(n_splits=k_fold, random_state=1, shuffle=True)

# model parameters
Cs=list(map(lambda x:pow(2,x),range(-5,5,1)))
gammas=list(map(lambda x:pow(2,x),range(-5,5,1)))

## SVM

In [28]:
best_params_ls = []
acc_ls = []
f1_ls = []
runt_ls = []
parameters = {'C':Cs, 'gamma':gammas}
for train_ix, test_ix in cv.split(Xs_use):
    
    # find the optimal hypber parameters
    svc = svm.SVC(kernel='rbf')
    clf = GridSearchCV(svc, parameters, n_jobs=1)
    clf.fit(Xs_use[train_ix], y_arr[train_ix])
    
    print(clf.best_params_)
    best_params_ls.append(clf.best_params_)
    
    # predict on the test data
#     y_pred = clf.predict(Xs_use[test_ix])
    y_pred, runtimes = atomic_benchmark_estimator(clf, Xs_use[test_ix], verbose=False) # predict & measure time
    runt_ls.append(np.mean(runtimes))
    
    acc = accuracy_score(y_arr[test_ix], y_pred)
    f1 = f1_score(y_arr[test_ix], y_pred, average='weighted')
    print('Accuracy: {:.3},\t F1: {:.3}'.format(acc,f1))
    acc_ls.append(acc)
    f1_ls.append(f1)

# print(feat_name+': SVM K-fold average test acc:', np.mean(acc_ls), 'F1:', np.mean(f1_ls), 'Run-time:', np.mean(runt_ls)*1e3,'ms')
out_msg = feat_name+': SVM K-fold average test acc: {:.2}, F1: {:.2}, Run-time: {:.2}ms'.format(np.mean(acc_ls), np.mean(f1_ls), np.mean(runt_ls)*1e3)
print(out_msg)

{'C': 16, 'gamma': 0.5}
Accuracy: 0.964,	 F1: 0.964
{'C': 16, 'gamma': 0.5}
Accuracy: 0.965,	 F1: 0.965
{'C': 16, 'gamma': 0.5}
Accuracy: 0.977,	 F1: 0.977
{'C': 16, 'gamma': 0.5}
Accuracy: 0.971,	 F1: 0.97
{'C': 16, 'gamma': 0.5}
Accuracy: 0.979,	 F1: 0.98
{'C': 16, 'gamma': 0.5}
Accuracy: 0.968,	 F1: 0.968
{'C': 16, 'gamma': 0.5}
Accuracy: 0.968,	 F1: 0.968
{'C': 16, 'gamma': 0.5}
Accuracy: 0.969,	 F1: 0.969
{'C': 16, 'gamma': 0.5}
Accuracy: 0.981,	 F1: 0.981
{'C': 16, 'gamma': 1}
Accuracy: 0.968,	 F1: 0.968
PSD: SVM K-fold average test acc: 0.97, F1: 0.97, Run-time: 0.4ms


In [None]:
# print out the size of the support vectors
# clf.best_estimator_.support_vectors_

## SVM with fixed hyperparameters

In [7]:
acc_ls = []
f1_ls = []
runt_ls = []

for train_ix, test_ix in cv.split(Xs_use):
    svc = svm.SVC(kernel='rbf', C=512, gamma = 0.0078125)
    svc.fit(Xs_use[train_ix], y_arr[train_ix])
    # predict on the test data
    y_pred, runtimes = atomic_benchmark_estimator(svc, Xs_use[test_ix], verbose=False)
    runt_ls.append(np.mean(runtimes))
    
    acc = accuracy_score(y_arr[test_ix], y_pred)
    f1 = f1_score(y_arr[test_ix], y_pred, average='weighted')
    print('Accuracy: {:.3},\t F1: {:.3}'.format(acc,f1))
    acc_ls.append(acc)
    f1_ls.append(f1)

out_msg = feat_name+': SVM K-fold average test acc: {:.2}, F1: {:.2}, Run-time: {:.2}ms'.format(np.mean(acc_ls), np.mean(f1_ls), np.mean(runt_ls)*1e3)
print(out_msg)

Accuracy: 0.958,	 F1: 0.958
Accuracy: 0.96,	 F1: 0.96
Accuracy: 0.954,	 F1: 0.954
Accuracy: 0.953,	 F1: 0.953
Accuracy: 0.953,	 F1: 0.953
Accuracy: 0.951,	 F1: 0.951
Accuracy: 0.95,	 F1: 0.95
Accuracy: 0.946,	 F1: 0.946
Accuracy: 0.952,	 F1: 0.952
Accuracy: 0.956,	 F1: 0.956
PSD: SVM K-fold average test acc: 0.95, F1: 0.95, Run-time: 1.5ms


In [8]:
print(svc.support_vectors_.shape)

(6254, 257)


## Logistic Regression

In [None]:
best_params_ls = []
score_ls = []

parameters = {'C':[0.01,0.1,1,10,100,1000,10000]}

for train_ix, test_ix in cv.split(Xs_arr):
    
    # find the optimal hypber parameters
    lr = LogisticRegression(max_iter=1000000)
    clf = GridSearchCV(lr, parameters, n_jobs=1)
    clf.fit(Xs_arr[train_ix], y_arr[train_ix])
    
    print(clf.best_params_)
    best_params_ls.append(clf.best_params_)
    
    # predict on the test data
    y_pred = clf.predict(Xs_arr[test_ix])
    acc = accuracy_score(y_arr[test_ix], y_pred)
    print(acc)
    score_ls.append(acc)
    
print(feat_file_name+': LR K-fold average test score:', np.mean(score_ls))