In [1]:
import numpy as np 
import pandas as pd
from sklearn.neighbors.kde import KernelDensity
from sklearn.metrics import precision_recall_fscore_support as prf, accuracy_score
import torch
from torch.nn import functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn

In [2]:
class ArrhythmiaLoader(object):
    def __init__(self, data_path, N_train, mode="train"):
        self.mode=mode
        data = np.load(data_path)

        labels = data[:,-1]
        features = data[:,:-1]
        N, D = features.shape
        
        normal_data = features[labels==1]
        normal_labels = labels[labels==1]

        N_normal = normal_data.shape[0]

        attack_data = features[labels==0]
        attack_labels = labels[labels==0]

        N_attack = attack_data.shape[0]

        randIdx = np.arange(N_attack)
        np.random.shuffle(randIdx)
        self.N_train = N_train
        self.train = attack_data[randIdx[:self.N_train]]
        self.train_labels = attack_labels[randIdx[:self.N_train]]
        
        self.test = attack_data[randIdx[self.N_train:]]
        self.test_labels = attack_labels[randIdx[self.N_train:]]
        
        self.test = np.concatenate((self.test, normal_data),axis=0)
        self.test_labels = np.concatenate((self.test_labels, normal_labels),axis=0)


    def __len__(self):
        """
        Number of images in the object dataset.
        """
        if self.mode == "train":
            return self.train.shape[0]
        else:
            return self.test.shape[0]


    def __getitem__(self, index):
        if self.mode == "train":
            return np.float32(self.train[index]), np.float32(self.train_labels[index])
        else:
            return np.float32(self.test[index]), np.float32(self.test_labels[index])  

In [3]:
class MutilAE(nn.Module):
    def __init__(self):
        super(MutilAE,self).__init__()
        
        self.encoder = nn.Sequential(
            nn.Linear(274,130),
            nn.Tanh(),
            nn.Linear(130,60),
            nn.Tanh(),
            nn.Linear(60,25),
            nn.Tanh(),
            nn.Linear(25,20),
            nn.Tanh()
        )

        
        self.decoder = nn.Sequential(
            nn.Linear(20,25),
            nn.Tanh(),
            nn.Linear(25,60),
            nn.Tanh(),
            nn.Linear(60,130),
            nn.Tanh(),
            nn.Linear(130,274),
            nn.Sigmoid()
        )
    def forward(self,x):
        enc = self.encoder(x)
        dec = self.decoder(enc)
        return enc, dec

In [4]:
def get_loader(data_path, batch_size, N_train, mode='train'):
    """Build and return data loader."""
    
    dataset = ArrhythmiaLoader(data_path, N_train, mode)

    shuffle = False
    if mode == 'train':
        shuffle = True

    data_loader = DataLoader(dataset=dataset,
                             batch_size=batch_size,
                             shuffle=shuffle)
    return data_loader

In [5]:
data_path = 'Arrhythmia.npy'

batch_size = 500
learn_rate = 0.0001
All_train = 386
Ratio = 0.2
N_train = int(All_train * Ratio)

In [None]:
Ratio = 0.1
iter_per_epoch = 1000
Average_cycle = 10
result = []
diff_quantity_result= []
for i in range (20):
    N_train = int(All_train*Ratio*(7+1))
    result = []
    print(Ratio*(8))
    for i in range(Average_cycle):
        
        Mutil = MutilAE()
        loss_function=nn.MSELoss()
        optimizer = torch.optim.Adam(Mutil.parameters(),lr=learn_rate)
        data_loader_train = get_loader(data_path, batch_size, N_train,mode='train')
        for i in range(iter_per_epoch):
            for j ,(input_data, labels)  in enumerate(data_loader_train):
                enc, dec = Mutil(input_data)
                optimizer.zero_grad()
                loss = loss_function(dec, input_data)
                loss.backward()
                optimizer.step()
                
        batch_size = 100000
        data_loader_train = get_loader(data_path, batch_size, N_train,mode='train')
        train_enc = []
        train_labels = []
        data_loader_test = get_loader(data_path, batch_size, N_train, mode='test')
        test_enc = []
        test_labels = []
        for i ,(input_data, labels)  in enumerate(data_loader_train):
            enc, dec = Mutil(input_data)
            enc = enc.detach().numpy()
            train_enc.append(enc)
            train_labels.append(labels.numpy())
        for i ,(input_data, labels)  in enumerate(data_loader_test):
            enc, dec = Mutil(input_data)
            enc = enc.detach().numpy()
            test_enc.append(enc)
            test_labels.append(labels.numpy())
        x =train_enc[0] 
        kde = KernelDensity(kernel='gaussian', bandwidth=0.00001).fit(x)
        score =  kde.score_samples(x)
        k = len(test_enc)
        test_score = []
        for i in range (k):
            score = kde.score_samples(test_enc[i])
            test_score.append(score)
        test_labels = np.concatenate(test_labels,axis=0)
        s = len(test_labels)
        c = np.sum(test_labels==1)
        g = c/s
        
        test_score = np.concatenate(test_score,axis=0)
        thresh = np.percentile(test_score, g*100)
        pred = (test_score < thresh).astype(int)
        gt = test_labels.astype(int)
        accuracy = accuracy_score(gt,pred)
        precision, recall, f_score, support = prf(gt, pred, average='binary')
        temp_result = [accuracy,precision,recall,f_score]
        result.append(temp_result)
    end_result = np.mean(result,axis=0)
    diff_quantity_result.append(end_result)
    print(end_result)