## Notebook for machine learning methods for rf-based detection & classification
- Date: June 27, 2022
- currently explored, SVM, Logistic regression with PSD
- To dos:
    - using spectrogram features

In [1]:
import os
import numpy as np
from sklearn.model_selection import KFold
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

### Load Features

In [7]:
# Load Data & Features
feat_folder = '../Features/'
feat_file_name = 'SPEC_1024_2022-06-28.npy'
DATA = np.load(feat_folder+feat_file_name, allow_pickle=True).item()
# PSD_1024_2022-06-27.npy
Xs_arr = DATA['feat']
y_arr = DATA['drones']

In [8]:
## Apply normalization
X_norm = Xs_arr
for n in range(len(Xs_arr)):
    X_norm[n] = Xs_arr[n]/max(Xs_arr[n])
X_norm.shape

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [6]:
Xs_use = X_norm # Normalized features

## Train Test split

In [7]:
# split data into K-fold
k_fold = 10
cv = KFold(n_splits=k_fold, random_state=1, shuffle=True)

# model parameters
Cs=list(map(lambda x:pow(2,x),range(-15,16,1)))
gammas=list(map(lambda x:pow(2,x),range(-15,16,1)))

## SVM

In [None]:
best_params_ls = []
score_ls = []
parameters = {'C':Cs, 'gamma':gammas}
for train_ix, test_ix in cv.split(Xs_arr):
    
    # find the optimal hypber parameters
    svc = svm.SVC(kernel='rbf')
    clf = GridSearchCV(svc, parameters, n_jobs=1)
    clf.fit(Xs_use[train_ix], y_arr[train_ix])
    
    print(clf.best_params_)
    best_params_ls.append(clf.best_params_)
    
    # predict on the test data
    y_pred = clf.predict(Xs_use[test_ix])
    acc = accuracy_score(y_arr[test_ix], y_pred)
    print(acc)
    score_ls.append(acc)

print(feat_file_name+': SVM K-fold average test score:', np.mean(score_ls))

## Logistic Regression

In [None]:
best_params_ls = []
score_ls = []

parameters = {'C':[0.01,0.1,1,10,100,1000,10000]}

for train_ix, test_ix in cv.split(Xs_arr):
    
    # find the optimal hypber parameters
    lr = LogisticRegression(max_iter=1000000)
    clf = GridSearchCV(lr, parameters, n_jobs=1)
    clf.fit(Xs_arr[train_ix], y_arr[train_ix])
    
    print(clf.best_params_)
    best_params_ls.append(clf.best_params_)
    
    # predict on the test data
    y_pred = clf.predict(Xs_arr[test_ix])
    acc = accuracy_score(y_arr[test_ix], y_pred)
    print(acc)
    score_ls.append(acc)
    
print(feat_file_name+': LR K-fold average test score:', np.mean(score_ls))