## Notebook for machine learning methods for rf-based detection & classification
- Date: June 27, 2022
- currently explored, SVM, Logistic regression with PSD
- To dos:
    - using spectrogram features

In [1]:
import os
import numpy as np
from sklearn.model_selection import KFold
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.linear_model import LogisticRegression

from helper_functions import *
from latency_helpers import *

### Load Features

In [36]:
feat_folder = '../Features/'
feat_name = 'PSD'
seg_len = 20
datestr = '2022-07-01'
n_per_seg = 512
interferences = ['WIFI', 'BLUE', 'BOTH', 'CLEAN']
Xs_arr, y_arr = load_features_arr(feat_folder, feat_name, seg_len, datestr, n_per_seg, interferences)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 78/78 [00:00<00:00, 151.79it/s]


In [37]:
# Load Data & Features
# feat_folder = '../Features/'
# interferences = ['BLUE', 'WIFI']
# # ,'BOTH', 'CLEAN']
# n_per_seg = 256
# datestr = '2022-06-28'

# Xs_arr =np.empty([0,n_per_seg//2+1])
# y_arr = np.empty([0,1])
# for itf in interferences:
#     feat_file_name = itf+'_PSD_'+str(n_per_seg)+'_'+datestr+'.npy'
#     DATA = np.load(feat_folder+feat_file_name, allow_pickle=True).item()
#     # PSD_1024_2022-06-27.npy
#     Xs_arr = np.vstack((Xs_arr, DATA['feat']))
#     y_arr = np.vstack((y_arr, DATA['drones'].reshape(len(DATA['drones']),1)))

In [38]:
## Apply normalization
X_norm = Xs_arr
for n in range(len(Xs_arr)):
    X_norm[n] = Xs_arr[n]/max(Xs_arr[n])
X_norm.shape
y_arr = y_arr.reshape(len(y_arr),)

In [40]:
Xs_use = X_norm # Normalized features
Xs_use.shape

(37978, 257)

## Train Test split

In [41]:
# split data into K-fold
k_fold = 10
cv = KFold(n_splits=k_fold, random_state=1, shuffle=True)

# model parameters
Cs=list(map(lambda x:pow(2,x),range(-10,10,1)))
gammas=list(map(lambda x:pow(2,x),range(-10,10,1)))

## SVM

In [None]:
best_params_ls = []
acc_ls = []
f1_ls = []
runt_ls = []
parameters = {'C':Cs, 'gamma':gammas}
for train_ix, test_ix in cv.split(Xs_use):
    
    # find the optimal hypber parameters
    svc = svm.SVC(kernel='rbf')
    clf = GridSearchCV(svc, parameters, n_jobs=1)
    clf.fit(Xs_use[train_ix], y_arr[train_ix])
    
    print(clf.best_params_)
    best_params_ls.append(clf.best_params_)
    
    # predict on the test data
#     y_pred = clf.predict(Xs_use[test_ix])
    y_pred, runtimes = atomic_benchmark_estimator(clf, Xs_use[test_ix], verbose=False)
    runt_ls.append(np.mean(runtimes))
    
    acc = accuracy_score(y_arr[test_ix], y_pred)
    f1 = f1_score(y_arr[test_ix], y_pred, average='weighted')
    print('Accuracy: {:.3},\t F1: {:.3}'.format(acc,f1))
    acc_ls.append(acc)
    f1_ls.append(f1)

# print(feat_name+': SVM K-fold average test acc:', np.mean(acc_ls), 'F1:', np.mean(f1_ls), 'Run-time:', np.mean(runt_ls)*1e3,'ms')
out_msg = feat_name+': SVM K-fold average test acc: {:.2}, F1: {:.2}, Run-time: {:.2}ms'.format(np.mean(acc_ls), np.mean(f1_ls), np.mean(runt_ls)*1e3)
print(out_msg)

In [32]:
out_msg = feat_name+': SVM K-fold average test acc: {:.2}, F1: {:.2}, Run-time: {:.2}ms'.format(np.mean(acc_ls), np.mean(f1_ls), np.mean(runt_ls)*1e3)
print(out_msg)

PSD: SVM K-fold average test acc: 0.99, F1: 0.99, Run-time: 0.17ms


In [None]:
clf.best_estimater.support_

## Logistic Regression

In [None]:
best_params_ls = []
score_ls = []

parameters = {'C':[0.01,0.1,1,10,100,1000,10000]}

for train_ix, test_ix in cv.split(Xs_arr):
    
    # find the optimal hypber parameters
    lr = LogisticRegression(max_iter=1000000)
    clf = GridSearchCV(lr, parameters, n_jobs=1)
    clf.fit(Xs_arr[train_ix], y_arr[train_ix])
    
    print(clf.best_params_)
    best_params_ls.append(clf.best_params_)
    
    # predict on the test data
    y_pred = clf.predict(Xs_arr[test_ix])
    acc = accuracy_score(y_arr[test_ix], y_pred)
    print(acc)
    score_ls.append(acc)
    
print(feat_file_name+': LR K-fold average test score:', np.mean(score_ls))