## Notebook for machine learning methods for rf-based detection & classification
-  exploring, SVM, Logistic regression with PSD

In [1]:
import os
import numpy as np
from sklearn.model_selection import KFold
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.linear_model import LogisticRegression

from helper_functions import *
from latency_helpers import *
from loading_functions import *

# DroneDetect Dataset

### Load Features

In [2]:
feat_folder = '../Features/'
feat_name = 'PSD'
seg_len = 20
# datestr = '2022-07-05'
n_per_seg = 1024
interferences = ['WIFI', 'BLUE', 'BOTH', 'CLEAN']
output_name = 'drones'
feat_format = 'ARR'

dataset = DroneDetectTorch(feat_folder, feat_name, seg_len, n_per_seg, feat_format,
                                output_name, interferences)

# Xs_arr, y_arr = load_dronedetect_features(feat_folder, feat_name, seg_len, 
#                                           n_per_seg, output_feat,interferences)

Directory Name:  ../Features/ARR_PSD_1024_20/


In [3]:
print('dataset size', len(dataset))
print('shape of each item', dataset.__getitem__([1,0,38977])[0].shape)

dataset size 38978
shape of each item (3, 1024)


In [4]:
tx, ty = dataset.__getitem__(1)

In [5]:
ty

'AIR'

In [6]:
i_all = range(len(dataset))
X_use, y_use = dataset.__getitem__(i_all)

In [7]:
y_use

array(['AIR', 'AIR', 'AIR', ..., 'DIS', 'DIS', 'DIS'], dtype='<U3')

## Train Test split

In [8]:
# split data into K-fold
k_fold = 10
kf = KFold(n_splits=k_fold, random_state=1, shuffle=True)

# model parameters
Cs=list(map(lambda x:pow(2,x),range(-3,10,2)))
gammas=list(map(lambda x:pow(2,x),range(-3,10,2)))

In [9]:
clf.fit(X_use[train_ix], y_use[train_ix],error_score='raise')

NameError: name 'clf' is not defined

## SVM

In [None]:
best_params_ls = []
acc_ls = []
f1_ls = []
runt_ls = []
parameters = {'C':Cs, 'gamma':gammas}
for train_ix, test_ix in kf.split(X_use):
    
    # find the optimal hypber parameters
    svc = svm.SVC(kernel='rbf')
    clf = GridSearchCV(svc, parameters, n_jobs=1)
    clf.fit(X_use[train_ix], y_use[train_ix])
    
    print(clf.best_params_)
    best_params_ls.append(clf.best_params_)
    
    # predict on the test data
#     y_pred = clf.predict(Xs_use[test_ix])
    y_pred, runtimes = atomic_benchmark_estimator(clf, X_use[test_ix], '<U3', verbose=False) # predict & measure time
    runt_ls.append(np.mean(runtimes))
    
    acc = accuracy_score(y_use[test_ix], y_pred)
    f1 = f1_score(y_use[test_ix], y_pred, average='weighted')
    print('Accuracy: {:.3},\t F1: {:.3}'.format(acc,f1))
    acc_ls.append(acc)
    f1_ls.append(f1)

# print(feat_name+': SVM K-fold average test acc:', np.mean(acc_ls), 'F1:', np.mean(f1_ls), 'Run-time:', np.mean(runt_ls)*1e3,'ms')
out_msg = feat_name+': SVM K-fold average test acc: {:.3}, F1: {:.3}, Run-time: {:.3}ms'.format(np.mean(acc_ls), np.mean(f1_ls), np.mean(runt_ls)*1e3)
print(out_msg)

In [None]:
# print out the size of the support vectors
# clf.best_estimator_.support_vectors_

In [45]:
Cs

[0.125, 0.5, 2, 8, 32, 128, 512]

## SVM with fixed hyperparameters

In [8]:
acc_ls = []
f1_ls = []
runt_ls = []

k_fold = 5
cv = KFold(n_splits=k_fold, random_state=1, shuffle=True)

for train_ix, test_ix in cv.split(Xs_use):
    svc = svm.SVC(kernel='rbf', C=512, gamma = 0.5)
    svc.fit(Xs_use[train_ix], y_arr[train_ix])
    # predict on the test data
    y_pred, runtimes = atomic_benchmark_estimator(svc, Xs_use[test_ix], output_type='<U3', verbose=False)
    runt_ls.append(np.mean(runtimes))
    
    acc = accuracy_score(y_arr[test_ix], y_pred)
    f1 = f1_score(y_arr[test_ix], y_pred, average='weighted')
    print('Accuracy: {:.3},\t F1: {:.3}'.format(acc,f1))
    acc_ls.append(acc)
    f1_ls.append(f1)

out_msg = feat_name+': SVM K-fold average test acc: {:.2}, F1: {:.2}, Run-time: {:.2}ms'.format(np.mean(acc_ls), np.mean(f1_ls), np.mean(runt_ls)*1e3)
print(out_msg)

Accuracy: 0.798,	 F1: 0.807
Accuracy: 0.796,	 F1: 0.804
Accuracy: 0.792,	 F1: 0.8
Accuracy: 0.792,	 F1: 0.8
Accuracy: 0.788,	 F1: 0.797
PSD: SVM K-fold average test acc: 0.79, F1: 0.8, Run-time: 1.4e+01ms


In [16]:
svc.predict(Xs_use[test_ix
                  ])

array(['INS', 'INS', 'INS', ..., 'MIN', 'MIN', 'MIN'], dtype='<U3')

In [30]:
print(svc.support_vectors_.shape)

(3126, 513)


## Logistic Regression

In [None]:
best_params_ls = []
score_ls = []

parameters = {'C':[0.01,0.1,1,10,100,1000,10000]}

for train_ix, test_ix in cv.split(Xs_arr):
    
    # find the optimal hypber parameters
    lr = LogisticRegression(max_iter=1000000)
    clf = GridSearchCV(lr, parameters, n_jobs=1)
    clf.fit(Xs_arr[train_ix], y_arr[train_ix])
    
    print(clf.best_params_)
    best_params_ls.append(clf.best_params_)
    
    # predict on the test data
    y_pred = clf.predict(Xs_arr[test_ix])
    acc = accuracy_score(y_arr[test_ix], y_pred)
    print(acc)
    score_ls.append(acc)
    
print(feat_file_name+': LR K-fold average test score:', np.mean(score_ls))

# Drone RF Dataset

In [3]:
# Load features
feat_folder = '../Features_DroneRF/'
feat_name = 'PSD'
highlow = 'H'
seg_len = 50
n_per_seg = 1024
Xs_arr, y_arr = load_dronerf_features(feat_folder, feat_name, seg_len, n_per_seg, highlow, 'bi')


## Apply normalization
X_norm = Xs_arr
for n in range(len(Xs_arr)):
    X_norm[n] = Xs_arr[n]/max(Xs_arr[n])
X_norm.shape
y_arr = y_arr.reshape(len(y_arr),)

Xs_use = X_norm # Use normalized features
Xs_use.shape


100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 364.05it/s]


(1130, 513)

In [6]:
best_params_ls = []
acc_ls = []
f1_ls = []
runt_ls = []
parameters = {'C':Cs, 'gamma':gammas}
for train_ix, test_ix in cv.split(Xs_use):
    
    # find the optimal hypber parameters
    svc = svm.SVC(kernel='rbf')
    clf = GridSearchCV(svc, parameters, n_jobs=1)
    clf.fit(Xs_use[train_ix], y_arr[train_ix])
    
    print(clf.best_params_)
    best_params_ls.append(clf.best_params_)
    
    # predict on the test data
#     y_pred = clf.predict(Xs_use[test_ix])
    y_pred, runtimes = atomic_benchmark_estimator(clf, Xs_use[test_ix], 'int', verbose=False) # predict & measure time
    runt_ls.append(np.mean(runtimes))
    
    acc = accuracy_score(y_arr[test_ix], y_pred)
    f1 = f1_score(y_arr[test_ix], y_pred, average='weighted')
    print('Accuracy: {:.3},\t F1: {:.3}'.format(acc,f1))
    acc_ls.append(acc)
    f1_ls.append(f1)

# print(feat_name+': SVM K-fold average test acc:', np.mean(acc_ls), 'F1:', np.mean(f1_ls), 'Run-time:', np.mean(runt_ls)*1e3,'ms')
out_msg = feat_name+': SVM K-fold average test acc: {:.2}, F1: {:.2}, Run-time: {:.2}ms'.format(np.mean(acc_ls), np.mean(f1_ls), np.mean(runt_ls)*1e3)
print(out_msg)

{'C': 32, 'gamma': 0.5}
Accuracy: 0.991,	 F1: 0.991
{'C': 128, 'gamma': 0.5}
Accuracy: 1.0,	 F1: 1.0
{'C': 128, 'gamma': 0.125}
Accuracy: 1.0,	 F1: 1.0
{'C': 32, 'gamma': 0.5}
Accuracy: 0.991,	 F1: 0.991
{'C': 128, 'gamma': 0.125}
Accuracy: 1.0,	 F1: 1.0
{'C': 128, 'gamma': 0.125}
Accuracy: 0.991,	 F1: 0.991
{'C': 128, 'gamma': 0.125}
Accuracy: 0.991,	 F1: 0.991
{'C': 128, 'gamma': 0.125}
Accuracy: 1.0,	 F1: 1.0
{'C': 512, 'gamma': 0.125}
Accuracy: 1.0,	 F1: 1.0
{'C': 128, 'gamma': 0.125}
Accuracy: 1.0,	 F1: 1.0
PSD: SVM K-fold average test acc: 1.0, F1: 1.0, Run-time: 0.12ms


## Try Model on Gamut data

In [2]:
data_path = '/home/kzhou/Data/S3/leesburg_worker1/Features/'
Xgamut = load_gamut_features(data_path, 'psd')

100%|███████████████████████████████████████| 115/115 [00:00<00:00, 2255.41it/s]


In [7]:
# normalize data
## Apply normalization
X_gamut_norm = Xgamut
for n in range(len(Xgamut)):
    X_gamut_norm[n] = Xgamut[n]/max(Xgamut[n])

X_gamut_norm.max()

1.0

In [8]:
# Feed data through trained SVM model
y_gamut_pred = clf.predict(X_gamut_norm)


In [9]:
y_gamut_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0,
       0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1,

## Save trained model for testing

In [None]:
bestmodel = clf.best_estimator_

In [None]:
filename = 'test_save_model.sav'
pickle.dump(bestmodel, open(filename, 'wb'))