 # Darknet Experiment

## Train the model

In [62]:
import numpy as np

org_dataset = np.load('drk_train.npy')
num_components = len(org_dataset)
org_dataset = np.array(org_dataset, dtype='float')

In [63]:
mean_vector = np.mean(org_dataset, axis=0)
dataset = org_dataset - mean_vector

In [64]:
from sklearn.decomposition import PCA
pca = PCA()
pca.fit_transform(dataset)
eig_vectors = pca.components_
eig_vectors

array([[-9.26686268e-02,  2.18674800e-01,  7.11552464e-02, ...,
         2.81576714e-02,  1.68889283e-01,  1.38801445e-01],
       [ 1.50134123e-02, -1.16753499e-01,  2.45395218e-01, ...,
        -3.88579634e-02, -4.66591686e-02, -2.72408536e-02],
       [ 6.19410055e-02,  7.82407995e-02,  2.50027346e-01, ...,
         4.89071260e-03, -3.17631709e-02, -2.14592807e-02],
       ...,
       [ 1.23505384e-08, -9.90638648e-09,  2.08366938e-09, ...,
         3.20654147e-09, -8.16094818e-09,  3.05983641e-09],
       [ 6.01062421e-16, -6.87622221e-16,  7.59483803e-16, ...,
        -3.19189120e-16,  1.05471187e-15,  4.16333634e-17],
       [ 0.00000000e+00,  3.45209972e-16, -2.27689644e-15, ...,
        -1.11022302e-16, -2.01227923e-16,  4.16333634e-17]])

In [65]:
ratio = pca.explained_variance_ratio_

### Find the eigenvectors that describe most of the signals

In [66]:
q = 0.80
k = 0
energy = 0

while energy < q:
    energy = energy+ratio[k]
    k=k+1

print("Number of components to capture "+ str(q*100)+"% energy:", k)
eigen_faces = eig_vectors

Number of components to capture 80.0% energy: 13


In [67]:
reduced_data = np.array(eigen_faces[:k]).transpose()

## Compute the weights for the train samples

In [68]:
w = np.array([np.dot(reduced_data.transpose(),i) for i in dataset])

## Test an unknown sample

In [69]:
test = np.load('drk_test.npy')

In [70]:
test_norm = test - mean_vector

In [71]:
w_unknown = np.dot(reduced_data.transpose(), test_norm[6000])
w_unknown

array([ 1.27950857,  1.08107416, -2.04226863, -0.44172001,  2.34844625,
       -1.69356681, -0.87441444,  0.67783362, -0.87740178,  0.01691007,
        0.64644521,  0.72249603,  0.27772603])

In [72]:
diff  = w - w_unknown
norms = np.linalg.norm(diff, axis=1)
min(norms)

0.04486125769152447

## Use the validation set to tune the hyperparameter bound

In [73]:
test = np.load('drk_vali.npy')
test_norm = test - mean_vector

count   = 0
correct_pred = 0
wrong_neg = 0
true_neg = 0
wrong_pos = 0
true_pos = 0
params = list()

def recogniser(vector, proj_data, w, bound, error):
    global count, correct_pred, wrong_neg, true_neg, wrong_pos, true_pos
    count += 1

    w_unknown = np.dot(proj_data.transpose(), vector)

    if error:
        vector = np.random.normal(0, 1/3, w_unknown.shape[0])
        w_unknown = w_unknown + error * vector/np.linalg.norm(vector) * np.linalg.norm(w_unknown)

    diff  = w - w_unknown


    norms = np.linalg.norm(diff, axis=1)
    params = np.min(norms)

    if(count > 5000):
        if(params <= bound):
            correct_pred+=1
            true_neg+=1
        else:
            wrong_pos +=1
    if(count <= 5000):
        if(params > bound):
            correct_pred+=1
            true_pos+=1
        else:
            wrong_neg +=1

bound = 0.0
number = list()
predictions = list()
predictions_f1 = list()
c=0
for j in range(0, 130, 1):
    for i in test_norm:
        recogniser(i, reduced_data, w, bound+j/100, error=0.00)
        c= c+1
    predictions.append(correct_pred/count*100.00)
    acc = correct_pred/count
    pre = true_pos/(wrong_neg+true_pos)
    rec = true_pos/(wrong_pos+true_pos)
    predictions_f1.append(2*pre*rec/(pre+rec)*100.00)
    count   = 0
    correct_pred = 0
    wrong_neg = 0
    true_neg = 0
    wrong_pos = 0
    true_pos = 0
    number.append(j/100)
print('finish')

## Maximize Accuracy and F1 score

In [75]:
np.argmax(predictions)/100

In [76]:
np.argmax(predictions_f1)/100

In [77]:
bound = predictions_f1[np.argmax(predictions_f1)]

In [78]:
import matplotlib.pyplot as plt

plt.plot(number, predictions, label='Accuracy Validation')
plt.plot(number, predictions_f1, label='F1 Validation')
plt.legend()

plt.xlabel('Bounds')
plt.ylabel('Accuracy / F1')
plt.title('Histogram')
plt.grid(True)
plt.show()

# Test dataset

In [87]:
test = np.load('drk_test.npy')
test_norm = test - mean_vector
count   = 0
correct_pred = 0
wrong_neg = 0
true_neg = 0
wrong_pos = 0
true_pos = 0
params = list()

def recogniser(vector, proj_data, w, error):
    global count, correct_pred, wrong_neg, true_neg, wrong_pos, true_pos
    count += 1

    w_unknown = np.dot(proj_data.transpose(), vector)

    diff  = w - w_unknown

    norms = np.linalg.norm(diff, axis=1)

    if error:
        value = np.random.normal(0, error/3, norms.shape[0])
        norms = norms + value*norms

    params = np.min(norms)

    if(count > 5000):
        if(params <= bound):
            correct_pred+=1
            true_neg+=1
        else:
            wrong_pos +=1
    if(count <= 5000):
        if(params > bound):
            correct_pred+=1
            true_pos+=1
        else:
            wrong_neg +=1

print(len(test_norm))
for i in test_norm:
    recogniser(i, reduced_data, w, error=0.1)

acc = correct_pred/count
pre = true_pos/(wrong_neg+true_pos)
rec = true_pos/(wrong_pos+true_pos)
f1 = 2*pre*rec/(pre+rec)
print('Accuracy: {}/{} = {}%'.format(correct_pred, count, acc*100.00))
print('Precision: {}/{} = {}%'.format(true_pos, (wrong_neg+true_pos), pre*100.00))
print('Recall: {}/{} = {}%'.format(true_pos, (wrong_pos+true_pos), rec*100.00))
print('F1 score: {}%'.format(f1*100.00))

10000
Accuracy: 9868/10000 = 98.68%
Precision: 4926/5000 = 98.52%
Recall: 4926/4984 = 98.83627608346708%
F1 score: 98.6778846153846%
