In [1]:
import os
import random
import numpy as np
import pandas as pd
import sklearn.metrics as metrics

from os import listdir
from numpy import argmax
from sklearn.metrics import roc_auc_score, classification_report

import torch
import torch.nn as nn
import torch.utils.data 
import torch.nn.functional as F

from utils import CustomDataset, weights_init_normal
from model import Deep_SVDD, C_AutoEncoder


seed = 10
config = {
    'device':'0,1,2,3',
    'weight_decay' : 0.0001,
    'weight_decay_ae': 0.0001,
    'lr' : 1e-4,
    'lr_ae': 1e-4,
    'num_epochs' : 2,
    'num_epochs_ae': 3,
    'batch_size' : 1024,
    'pretrain' : True,
    'latent_dim': 27,
    
}

# --------------------------------------
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)  # if use multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  # Arrange GPU devices starting from 0
os.environ["CUDA_VISIBLE_DEVICES"]=config['device']
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")    
print('Device: %s' % device)
if (device.type == 'cuda') or (torch.cuda.device_count() > 1):
    print('GPU activate --> Count of using GPUs: %s' % torch.cuda.device_count())
config['device'] = device

normal_folder_path = '/data/COMPUTER_VISION/AMSU_PREP/Data/Normal/'
abnormal_folder_path = '/data/COMPUTER_VISION/AMSU_PREP/Data/Abnormal/'
TRAIN_Normal_list, VALID_Normal_list = [], []
TRAIN_Abnormal_list, VALID_Abnormal_list = [], []

for c in sorted(listdir(normal_folder_path)):
    if ('TRAIN' in c)&('pkl' in c): 
        TRAIN_Normal_list.append(normal_folder_path+c)
    elif ('VALID' in c)&('pkl' in c): 
        VALID_Normal_list.append(normal_folder_path+c)
        
for c in sorted(listdir(abnormal_folder_path)):
    if ('TRAIN' in c)&('pkl' in c): 
        TRAIN_Abnormal_list.append(abnormal_folder_path+c)
    elif ('VALID' in c)&('pkl' in c): 
        VALID_Abnormal_list.append(abnormal_folder_path+c)

Device: cuda
GPU activate --> Count of using GPUs: 4


In [2]:
class TrainerDeepSVDD:
    def __init__(self, config, data):
        self.config = config
        self.data_path = data
        self.device = config['device']
    
    def pretrain(self):
        ae = C_AutoEncoder(self.config['latent_dim']).to(self.device)
        ae.apply(weights_init_normal)
        ae = nn.DataParallel(ae).to(self.device)
            
        optimizer = torch.optim.AdamW(ae.parameters(), lr=self.config['lr_ae'], weight_decay=self.config['weight_decay_ae'])
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0)
        
        ae.train()
        for epoch in range(self.config['num_epochs_ae']):
            
            for TRAIN_path in self.data_path:
                tr_dataset = CustomDataset(TRAIN_path)
                train_loader = torch.utils.data.DataLoader(dataset=tr_dataset, batch_size=config['batch_size'], shuffle=True, drop_last=True)
                total_loss = 0
                
                for train_data in train_loader:
                    x = train_data.to(self.device) 
        
                    optimizer.zero_grad()
                    x_hat = ae(x)
                    reconst_loss = torch.mean(torch.sum((x_hat - x) ** 2, dim=tuple(range(1, x_hat.dim()))))
                    reconst_loss.backward()
                    optimizer.step()
                    
                    total_loss += reconst_loss.item()
                scheduler.step()
                print('Pretraining Autoencoder... Epoch: {}, Loss: {:.3f}'.format(
                        epoch, total_loss/len(train_loader)))
                
            self.save_weights_for_DeepSVDD(ae, train_loader) 
    

    def save_weights_for_DeepSVDD(self, model, dataloader):
        c = self.set_c(model, dataloader)
        net = Deep_SVDD(self.config['latent_dim']).to(self.device)
        net = nn.DataParallel(net).to(self.device)
        
        state_dict = model.module.state_dict() if torch.cuda.device_count() > 1 else model.state_dict()
        net.module.load_state_dict(state_dict, strict=False) if torch.cuda.device_count() > 1 else net.load_state_dict(state_dict, strict=False)

        torch.save({'center': c.cpu().data.numpy().tolist(),
                    'net_dict': net.state_dict()}, 'weights/pretrained_parameters.pth')
    

    def set_c(self, model, dataloader, eps=0.1):
        """Initializing the center for the hypersphere"""
        model.eval()
        z_ = []
        with torch.no_grad():
            for x in dataloader:
                x = x.to(self.device) 
                z = model.module.encoder(x) if torch.cuda.device_count() > 1 else model.encoder(x)
                z_.append(z.detach())
        z_ = torch.cat(z_)
        c = torch.mean(z_, dim=0)
        c[(abs(c) < eps) & (c < 0)] = -eps
        c[(abs(c) < eps) & (c > 0)] = eps
        return c


    def train(self):
        """Training the Deep SVDD model"""
        net = Deep_SVDD(self.config['latent_dim']).to(self.device)
        net = nn.DataParallel(net).to(self.device)
        
        if self.config['pretrain']==True:
            state_dict = torch.load('weights/pretrained_parameters.pth')
            net.module.load_state_dict(state_dict['net_dict'], strict=False) if torch.cuda.device_count() > 1 else net.load_state_dict(state_dict['net_dict'], strict=False)
            c = torch.Tensor(state_dict['center']).to(self.device)
        else:
            net.apply(weights_init_normal)
            c = torch.randn(int(self.config['latent_dim']/8)).to(self.device)
        
        optimizer = torch.optim.AdamW(net.parameters(), lr=self.config['lr'], weight_decay=self.config['weight_decay'])
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0)

        net.train()
        for epoch in range(self.config['num_epochs']):
            for TRAIN_path in self.data_path:
                tr_dataset = CustomDataset(TRAIN_path)
                train_loader = torch.utils.data.DataLoader(dataset=tr_dataset, batch_size=config['batch_size'], shuffle=True, drop_last=True)
                total_loss = 0
                
                for train_data in train_loader:
                    x = train_data.to(self.device) 

                    optimizer.zero_grad()
                    z = net(x)
                    loss = torch.mean(torch.sum((z - c) ** 2, dim=1))
                    loss.backward()
                    optimizer.step()

                    total_loss += loss.item()
                scheduler.step()
                print('Training Deep SVDD... Epoch: {}, Loss: {:.3f}'.format(
                        epoch, total_loss/len(train_loader)))
                
                state_dict = net.state_dict()
                torch.save({'center': c.cpu().data.numpy().tolist(),
                            'net_dict': net.state_dict()}, 'weights/parameters_deepsvdd.pth')  
                net.module.load_state_dict(state_dict, strict=False) if torch.cuda.device_count() > 1 else net.load_state_dict(state_dict, strict=False)
            
        self.net = net
        self.c = c

In [3]:
deep_SVDD = TrainerDeepSVDD(config, TRAIN_Normal_list[:30])

if config['pretrain']:
    deep_SVDD.pretrain()
deep_SVDD.train()

Pretraining Autoencoder... Epoch: 0, Loss: 10.022
Pretraining Autoencoder... Epoch: 0, Loss: 8.042
Pretraining Autoencoder... Epoch: 0, Loss: 8.152
Pretraining Autoencoder... Epoch: 0, Loss: 10.193
Pretraining Autoencoder... Epoch: 0, Loss: 10.150
Pretraining Autoencoder... Epoch: 0, Loss: 8.228
Pretraining Autoencoder... Epoch: 0, Loss: 8.231
Pretraining Autoencoder... Epoch: 0, Loss: 10.294
Pretraining Autoencoder... Epoch: 0, Loss: 10.269
Pretraining Autoencoder... Epoch: 0, Loss: 8.252
Pretraining Autoencoder... Epoch: 0, Loss: 8.422
Pretraining Autoencoder... Epoch: 0, Loss: 10.435
Pretraining Autoencoder... Epoch: 0, Loss: 10.452
Pretraining Autoencoder... Epoch: 0, Loss: 8.312
Pretraining Autoencoder... Epoch: 0, Loss: 8.402
Pretraining Autoencoder... Epoch: 0, Loss: 10.412
Pretraining Autoencoder... Epoch: 0, Loss: 10.363
Pretraining Autoencoder... Epoch: 0, Loss: 8.268
Pretraining Autoencoder... Epoch: 0, Loss: 8.397
Pretraining Autoencoder... Epoch: 0, Loss: 10.489
Pretrainin

In [7]:
def eval(net, c, data_list, dset_class):
    
    dset_class = dset_class
    dset = CustomDataset(data_list)
    dset_loader = torch.utils.data.DataLoader(dataset=dset, batch_size=config['batch_size'], shuffle=False, drop_last=True)
    result_df = pd.DataFrame(columns=['class', 'loss'])
    scores = []
        
    net.eval()
    with torch.no_grad():
        for x in dset_loader:
            x = x.to(device)
            z = net(x)
            score = torch.sum((z - c) ** 2, dim=1)
            scores.extend(score.detach().cpu().numpy())
            
        result_df['loss'] = scores
        result_df['class'] = dset_class
    
    return result_df


concat_df = pd.DataFrame(columns=['class', 'loss'])
for Normal_path, Abnormal_path in zip(VALID_Normal_list, VALID_Abnormal_list):

    normal_result = eval(deep_SVDD.net, deep_SVDD.c, Normal_path, 'Normal')
    abnormal_result = eval(deep_SVDD.net,deep_SVDD.c, Abnormal_path, 'Abnormal')
    concat_df = pd.concat([concat_df, normal_result, abnormal_result], axis=0).reset_index(drop=True) 

change_value_dict = {'Normal':0, 'Abnormal':1}
concat_df = concat_df.replace({'class':change_value_dict})

fpr, tpr, thresholds = metrics.roc_curve(concat_df['class'], concat_df['loss'])
J = tpr - fpr
ix = argmax(J)
best_threshold = thresholds[ix]

concat_df['predict_class'] = pd.DataFrame(concat_df['loss']).apply(lambda x: [1 if y >= best_threshold else 0 for y in x])
class_report = classification_report(concat_df['class'], concat_df['predict_class'], target_names=['Normal', 'Abnormal'], output_dict=True)

f1_score = class_report["accuracy"]

Testing...
Testing...


In [16]:
class_report['macro avg']

{'precision': 0.9998805272707689,
 'recall': 0.9998738395336857,
 'f1-score': 0.9998771683052723,
 'support': 11748352}