## Notebook for machine learning methods for rf-based detection & classification
- Date: June 27, 2022
- currently explored, SVM, Logistic regression with PSD
- To dos:
    - using spectrogram features

In [5]:
import os
import numpy as np
from sklearn.model_selection import KFold
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

### Load Features

In [6]:
# Load Data & Features
feat_folder = '../Features/'
feat_file_name = 'PSD_4096_2022-06-27.npy'
DATA = np.load(feat_folder+feat_file_name, allow_pickle=True).item()
# PSD_1024_2022-06-27.npy
Xs_arr = DATA['feat']
y_arr = DATA['drones']

In [7]:
## Apply normalization
X_norm = Xs_arr
for n in range(len(Xs_arr)):
    X_norm[n] = Xs_arr[n]/max(Xs_arr[n])
X_norm.shape

(390, 2049)

In [8]:
Xs_use = X_norm # Normalized features

## Train Test split

In [9]:
# split data into K-fold
k_fold = 10
cv = KFold(n_splits=k_fold, random_state=1, shuffle=True)

# model parameters
Cs=list(map(lambda x:pow(2,x),range(-15,16,1)))
gammas=list(map(lambda x:pow(2,x),range(-15,16,1)))

## SVM

In [10]:
best_params_ls = []
score_ls = []
parameters = {'C':Cs, 'gamma':gammas}
for train_ix, test_ix in cv.split(Xs_arr):
    
    # find the optimal hypber parameters
    svc = svm.SVC(kernel='rbf')
    clf = GridSearchCV(svc, parameters, n_jobs=1)
    clf.fit(Xs_use[train_ix], y_arr[train_ix])
    
    print(clf.best_params_)
    best_params_ls.append(clf.best_params_)
    
    # predict on the test data
    y_pred = clf.predict(Xs_use[test_ix])
    acc = accuracy_score(y_arr[test_ix], y_pred)
    print(acc)
    score_ls.append(acc)

print(feat_file_name+': SVM K-fold average test score:', np.mean(score_ls))

{'C': 256, 'gamma': 0.125}
0.9487179487179487
{'C': 1024, 'gamma': 0.0078125}
0.9487179487179487
{'C': 512, 'gamma': 0.0625}
0.9743589743589743
{'C': 256, 'gamma': 0.125}
1.0
{'C': 64, 'gamma': 1}
0.9743589743589743
{'C': 4096, 'gamma': 0.0078125}
0.9487179487179487
{'C': 1024, 'gamma': 0.015625}
0.9743589743589743
{'C': 2048, 'gamma': 0.015625}
1.0
{'C': 128, 'gamma': 0.25}
0.9743589743589743
{'C': 256, 'gamma': 0.125}
1.0
PSD_4096_2022-06-27.npy: SVM K-fold average test score: 0.9743589743589742


## Logistic Regression

In [11]:
best_params_ls = []
score_ls = []

parameters = {'C':[0.01,0.1,1,10,100,1000,10000]}

for train_ix, test_ix in cv.split(Xs_arr):
    
    # find the optimal hypber parameters
    lr = LogisticRegression(max_iter=1000000)
    clf = GridSearchCV(lr, parameters, n_jobs=1)
    clf.fit(Xs_arr[train_ix], y_arr[train_ix])
    
    print(clf.best_params_)
    best_params_ls.append(clf.best_params_)
    
    # predict on the test data
    y_pred = clf.predict(Xs_arr[test_ix])
    acc = accuracy_score(y_arr[test_ix], y_pred)
    print(acc)
    score_ls.append(acc)
    
print(feat_file_name+': LR K-fold average test score:', np.mean(score_ls))

{'C': 10000}
0.9487179487179487
{'C': 10000}
0.9743589743589743
{'C': 10000}
0.9743589743589743
{'C': 10000}
1.0
{'C': 10000}
1.0
{'C': 1000}
1.0
{'C': 1000}
0.9743589743589743
{'C': 10000}
0.9743589743589743
{'C': 10000}
0.9743589743589743
{'C': 1000}
1.0
PSD_4096_2022-06-27.npy: LR K-fold average test score: 0.9820512820512821
