In [177]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import mode

class DANN:
    def __init__(self):
        self.X = None
        self.y = None
        self.num_surrounding_points = None
        self.epsilon = None
    def fit(self, X, y, size=50, epsilon=1):
        self.X = X
        self.y = y
        self.num_surrounding_points = size
        self.epsilon = epsilon
        return self
    def predict(self, x, k=10): 
        n_features = len(x)
        distances = []
        for row in self.X:
            distances.append(np.linalg.norm(row - x))
        distances = np.array(distances)
        nearest_neighbors = np.argsort(distances)[:self.num_surrounding_points]
        neighbors_dat = self.X[nearest_neighbors, :]
        neighbors_dat_mean = neighbors_dat.mean(axis=0)
        neighbors_labels = self.y[nearest_neighbors]
        neighbors_unique_labels = np.unique(neighbors_labels)
        class_freq_dat = {}
        within_class_covariance = np.zeros((n_features, n_features))
        between_class_covariance = np.zeros((n_features, n_features))
        
        for curr_class in neighbors_unique_labels:
            class_index = np.where(neighbors_labels == curr_class)[0]
            class_freq_dat[curr_class] = np.sum(neighbors_labels == curr_class) / self.num_surrounding_points
            class_covariance = np.cov(neighbors_dat[class_index, :], rowvar=False)
            within_class_covariance += class_covariance * class_freq_dat[curr_class]
            class_mean = neighbors_dat[class_index, :].mean(axis=0)
            between_class_covariance += np.outer(class_mean - neighbors_dat_mean, class_mean - neighbors_dat_mean) * \
                class_freq_dat[curr_class]
        
        W_mat = np.linalg.pinv(np.nan_to_num(np.power(within_class_covariance, 0.5)))
        B_mat = np.dot(W_mat, between_class_covariance).dot(W_mat)
        I = np.identity(n_features)
        sigma = W_mat.dot(B_mat + self.epsilon * I).dot(W_mat)
        distances = []
        
        for row in self.X:
            distances.append(self.dann_internal_dist(x, row, sigma))
        distances = np.array(distances)
        nearest_k = distances.argsort()[:k]
        pred = mode(self.y[nearest_k]).mode[0]
        
        return pred  
        
    def dann_internal_dist(self, x0, x1, sigma):
        diff = x0 - x1
        internal_dist = diff.T.dot(sigma).dot(diff)
        return internal_dist

In [141]:
train_data = pd.read_csv("data/train_data.txt", header=None)
train_data.sample(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19,20,21,22,23,24,25,26,27,28
991,39,1.304,6.5e-05,0.589,0.473,1.766,7.492,0.763,3.021,4.152,...,214.282,54,52,0.005004,0.000397,24.39,1,6.745,1,0
537,21,3.575,0.000336,1.264,1.124,3.793,10.458,0.831,6.216,8.278,...,111.694,11,10,0.009404,0.000838,67.742,0,0.0,1,0
404,16,2.056,0.000178,0.856,0.969,2.569,15.416,1.391,6.343,8.475,...,136.638,128,126,0.008661,0.000686,30.909,1,9.747,55,1
878,34,2.272,0.000182,0.81,1.256,2.43,15.819,1.42,7.219,8.526,...,138.615,60,59,0.007999,0.000693,2.083,0,0.0,1,0
244,10,4.753,0.00056,1.891,1.735,5.672,28.335,1.906,12.801,13.152,...,101.218,13,11,0.011778,0.001434,59.649,1,42.75,12,1
851,33,1.775,0.000104,0.588,0.713,1.765,18.226,1.475,9.254,15.479,...,465.093,62,57,0.005842,0.001221,35.0,1,28.922,1,0
605,24,1.353,0.000123,0.237,0.339,0.712,8.809,0.816,2.9,4.838,...,115.652,26,25,0.009086,0.000514,21.951,0,0.0,1,0
350,14,4.014,0.000248,1.702,1.436,5.105,11.446,1.029,5.399,4.668,...,196.648,37,35,0.006172,0.001154,31.579,1,18.632,5,1
12,1,2.105,0.00011,0.902,1.076,2.707,6.163,0.718,2.403,3.163,...,217.449,107,104,0.005208,0.000646,0.0,0,0.0,23,1
517,20,1.845,0.0001,1.073,1.096,3.219,11.451,1.052,5.815,6.652,...,207.612,59,58,0.005399,0.000238,19.512,0,0.0,46,1


In [142]:
X_dat = train_data[np.arange(2,28)]
X_dat

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,18,19,20,21,22,23,24,25,26,27
0,0.000090,0.900,0.794,2.699,8.334,0.779,4.517,4.609,6.802,13.551,...,142.229,187.576,160,159,0.006065,0.000416,0.000,0,0.000,23
1,0.000038,0.353,0.376,1.059,5.864,0.642,2.058,3.180,7.194,6.175,...,159.515,234.505,170,169,0.005181,0.000403,2.247,0,0.000,23
2,0.000074,0.732,0.670,2.196,8.719,0.875,4.347,5.166,7.548,13.040,...,146.445,211.442,1431,1427,0.006071,0.000474,10.656,1,0.178,23
3,0.000123,1.156,1.634,3.469,13.513,1.273,5.263,8.771,16.779,15.789,...,182.713,220.230,94,92,0.004910,0.000320,0.000,0,0.000,23
4,0.000167,1.715,1.539,5.145,9.112,1.040,3.102,4.927,12.823,9.307,...,182.821,225.162,117,114,0.004757,0.000380,18.182,1,13.318,23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1035,0.000398,2.721,2.510,8.162,9.846,0.965,4.593,6.806,8.303,13.780,...,96.145,158.591,63,60,0.007766,0.001066,24.194,2,15.113,1
1036,0.000133,0.808,0.690,2.425,7.571,0.740,2.720,3.927,11.928,8.160,...,121.490,143.797,47,44,0.007561,0.000430,46.053,1,25.792,1
1037,0.000226,1.141,1.266,3.423,15.489,1.456,6.433,10.138,14.239,19.300,...,95.921,149.641,79,77,0.007767,0.000881,20.000,1,4.574,1
1038,0.000275,1.786,1.484,5.359,15.702,1.367,6.374,11.094,18.818,19.123,...,90.051,451.366,75,70,0.007257,0.001714,16.667,3,19.575,1


In [143]:
y_dat = train_data[28]
y_dat

0       1
1       1
2       1
3       1
4       1
       ..
1035    0
1036    0
1037    0
1038    0
1039    0
Name: 28, Length: 1040, dtype: int64

In [178]:
dann = DANN().fit(X_dat.values, y_dat)

In [179]:
import warnings
warnings.filterwarnings('ignore')

preds = []
for row in X_dat.values:
    preds.append(dann.predict(row))

In [180]:
preds

[0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
