In [None]:
%load_ext autoreload
%autoreload 2
import time
import timeit

from biometric_system_euclidiano import BiometricSystem

from anomaly_detectors.M2005 import M2005 
from anomaly_detectors import thresholds
from data_stream import data_stream
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold

from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import cosine_similarity

from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import kneighbors_graph
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import networkx as nx


import json
import ipdb
import os, sys
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
import random
import copy
import IPython.display as ipd


def Average(lst):
    x = sum(lst)
    y = len(lst)
    a = (round(x,6) / round(y))
    return a


def split_data_enrollment(dataset, column, n_samples):
    data_to_enrollment = dict()

    for value in dataset[column].unique():
        
        data_to_enrollment.setdefault(value, dataset.loc[dataset[column]==value].iloc[:(n_samples//2)].loc[:,~dataset.columns.isin([column])].reset_index(drop=True))
    
    return data_to_enrollment

def split_data_validation(dataset, column, n_samples):
    data_to_validation = dict()

    for value in dataset[column].unique():
        
        data_to_validation.setdefault(value, dataset.loc[dataset[column]==value].iloc[(n_samples//2):n_samples].loc[:,~dataset.columns.isin([column])].reset_index(drop=True))
    
    return data_to_validation


def split_data_recognition(dataset, column, n_samples):
    data_to_recognition = dict()

    for value in dataset[column].unique():
 
        data_to_recognition.setdefault(value, dataset.loc[dataset[column]==value].iloc[:n_samples].loc[:,~dataset.columns.isin([column])].reset_index(drop=True))
    
    
    return data_to_recognition

def split_data(dataset, column, n_samples):
    data_to_enrollment = dict()
    data_to_validation = dict()
    data_to_recognition = dict()

    for value in dataset[column].unique():
        
        #Treinamento, pegando dados de usuarios 
        data_to_enrollment.setdefault(value, dataset.loc[dataset[column]==value].iloc[:(n_samples//2)].loc[:,~dataset.columns.isin([column])].reset_index(drop=True))

        # Definir o limiar de decisao dos algoritmos de classificacao
        data_to_validation.setdefault(value, dataset.loc[dataset[column]==value].iloc[(n_samples//2):n_samples].loc[:,~dataset.columns.isin([column])].reset_index(drop=True))

        # Criar o fluxo de teste
        data_to_recognition.setdefault(value, dataset.loc[dataset[column]==value].iloc[n_samples:].loc[:,~dataset.columns.isin([column])].reset_index(drop=True))
    
    
    return data_to_enrollment,data_to_validation, data_to_recognition

#--------------------------------------------------------------------------------------------------------------# 
# INICIO DO TIMER
inicio = timeit.default_timer()
#--------------------------------------------------------------------------------------------------------------# 


df = pd.read_csv('dados/DSL-Modificado.csv', delimiter = ',', index_col = [0])
df = df.drop(['rep'], axis=1)
users = df['subject'].unique()

perc = 0.5
impostor_rate = 0.30
rate_external_impostor = 0
R=1
GRAPH_MIN_CUT_GROWING = []

#--------------------------------------------------------------------------------------------------------------# 
#Separação de index, primeiro pra treino, segundo para validação e teste
# Dependendo do teste é necessario trocar a linha ("Separação por Index")

sessionIndex1 = 1

#--------------------------------------------------------------------------------------------------------------# 
#Separação de usuarios

len_reg_users = int(len(users) * perc)

kfold = KFold(n_splits=2, shuffle=True, random_state=R)
splits = kfold.split(users)

#--------------------------------------------------------------------------------------------------------------# 
#Registro de usuarios

for i, (reg_users, not_reg_users) in enumerate(splits):

    internal_users = copy.deepcopy(df.loc[df['subject'].isin(users[reg_users])])
    external_users = copy.deepcopy(df.loc[~df['subject'].isin(users[reg_users])])
   
    frames = [internal_users, external_users]

    internal_users = pd.concat(frames)
#--------------------------------------------------------------------------------------------------------------#  
#Separação por Index

dataS1 = internal_users.loc[(internal_users['sessionIndex'] == sessionIndex1)]
dataS1.drop(["sessionIndex"], axis=1, inplace=True)

dataS2 = internal_users.loc[(internal_users['sessionIndex'] != sessionIndex1)]
dataS2.drop(["sessionIndex"], axis=1, inplace=True)


#--------------------------------------------------------------------------------------------------------------#  
# Dados para Treino, Validação e Reconhecimento

#Treinamento, pegando dados de usuarios 
data_to_enrollment = split_data_enrollment(dataS1, column='subject', n_samples=50)

# Definir o limiar de decisao dos algoritmos de classificacao
data_to_validation = split_data_validation(dataS1, column='subject', n_samples=50)

# Criar o fluxo de teste
data_to_recognition = split_data_recognition(dataS2, column='subject', n_samples=350)

_, _, external_users_data = split_data(external_users, column='subject', n_samples=50)


#--------------------------------------------------------------------------------------------------------------# 

#Sistema com Adaptação (GraphMinCut)

detector = M2005()
adaptive= "GrowingWindow"
system = BiometricSystem(detector=detector, random_state=R)
system.enrollment_grafos(dataset=data_to_enrollment, adaptive=adaptive)

decision_threshold = thresholds.best_threshold(data_to_validation, system, size=10, random_state=R)

metrics_adaptativo_grafo_growing = dict()

lista_nao_usadas_grafo_growing = list()
lista_usadas_grafo_growing = list()

auxiliar_euclidiano = {}

for j, genuine in enumerate(system.users.keys()):


    ipd.clear_output(wait=True)
    print(f"Rodando GraphMinCutGrowing")
    print(f"Testando usuário {j+1}/{len(system.users.keys())}")

    datastream = data_stream.Random(impostor_rate= impostor_rate,
                                    rate_external_impostor=rate_external_impostor,
                                    random_state=R)

    test_stream, y_true, amostras_grafo_growing_genuinas,amostras_grafo_growing_impostoras = datastream.create(genuine,
                                                                                                data_to_recognition,
                                                                                                external_users_data)



    y_pred, lista_nao_usadas_grafo_growing2, lista_usadas_grafo_growing2 = system.autenticate_grafos(genuine,
                                                                                test_stream,
                                                                                decision_threshold=decision_threshold,
                                                                                adaptive_TESTE=adaptive,
                                                                                return_scores=False)

    lista_nao_usadas_grafo_growing.append(lista_nao_usadas_grafo_growing2)
    lista_usadas_grafo_growing.append(lista_usadas_grafo_growing2)

    fmr, fnmr, b_acc,  y_genuine , y_impostor = system.compute_metrics(y_true, y_pred)
    auxiliar_euclidiano = y_genuine.value_counts()[1], y_impostor.value_counts()[1]

    for met in ['fmr','fnmr','b_acc']:
        metrics_adaptativo_grafo_growing.setdefault(genuine, dict()).setdefault(met,[]).append(eval(met))
    #json.dump(metrics_adaptativo_grafo_growing, open("metricas_grafos_growing.json", "w"))


usuarios = metrics_adaptativo_grafo_growing.keys()
result = pd.DataFrame(metrics_adaptativo_grafo_growing.values())

fmr_mean = []
fnmr_mean = []
b_acc_mean = []

for i in result['fmr']:
    fmr_mean.append(Average(i))

for i in result['fnmr']:
    fnmr_mean.append(Average(i))

for i in result['b_acc']:
    b_acc_mean.append(Average(i))

metrics_adaptativo_grafo_growing_mean = pd.DataFrame(list(zip(usuarios, fmr_mean, fnmr_mean,b_acc_mean)),
            columns =['Usuarios','fmr_mean', 'fnmr_mean','b_acc_mean'])

GRAPH_MIN_CUT_GROWING.append(metrics_adaptativo_grafo_growing_mean['b_acc_mean'].mean())

#--------------------------------------------------------------------------------------------------------------#     

fim = timeit.default_timer()
horas, rem = divmod(fim-inicio, 3600)
minutos, segundos = divmod(rem, 60)
print("{:0>2}:{:0>2}:{:05.2f}".format(int(horas),int(minutos),segundos))

print('---------------------')
print("Teste com Grafos distancia Euclidiana")
print("GRAPH_MIN_CUT_GROWING", GRAPH_MIN_CUT_GROWING)



#--------------------------------------------------------------------------------------------------------------# 

Rodando GraphMinCutGrowing
Testando usuário 1/51
> [1;32mc:\users\murilo\documents\mestrado\sistemas mestrado - copia\sistema com grafos(tentativa de proposta)\biometric_system_euclidiano.py[0m(223)[0;36mDistancia_euclidiana[1;34m()[0m
[1;32m    221 [1;33m        [1;32mimport[0m [0mpdb[0m[1;33m;[0m [0mpdb[0m[1;33m.[0m[0mset_trace[0m[1;33m([0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m    222 [1;33m[1;33m[0m[0m
[0m[1;32m--> 223 [1;33m        [0mE_dist[0m [1;33m=[0m [0meuclidean_distances[0m[1;33m([0m[0mY[0m[1;33m,[0m [0mX[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m    224 [1;33m        [1;31m#E_dist.max()[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m    225 [1;33m[1;33m[0m[0m
[0m
ipdb> E_dist 
*** NameError: name 'E_dist' is not defined
ipdb> Y
[[0.3085, 0.3531, 0.4553, 0.2063, 0.3326, 0.2769, 0.4237, 0.1858, 0.4244, 0.4347, 0.5258, 0.3333, 0.7432, 0.7194, 0.8208, 0.6418, 0.8913, 0.9056, 0.9832, 0.8137, 0.3487, 0.3475999999999

ipdb> type(Y)
<class 'list'>
ipdb> len(Y)
26
ipdb> n
> [1;32mc:\users\murilo\documents\mestrado\sistemas mestrado - copia\sistema com grafos(tentativa de proposta)\biometric_system_euclidiano.py[0m(226)[0;36mDistancia_euclidiana[1;34m()[0m
[1;32m    224 [1;33m        [1;31m#E_dist.max()[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m    225 [1;33m[1;33m[0m[0m
[0m[1;32m--> 226 [1;33m        [0mresult[0m [1;33m=[0m [0mnp[0m[1;33m.[0m[0mwhere[0m[1;33m([0m[0mE_dist[0m [1;33m==[0m [0mnp[0m[1;33m.[0m[0mamax[0m[1;33m([0m[0mE_dist[0m[1;33m)[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m    227 [1;33m[1;33m[0m[0m
[0m[1;32m    228 [1;33m        [0mponto1[0m [1;33m=[0m [0mint[0m[1;33m([0m[0mresult[0m[1;33m[[0m[1;36m0[0m[1;33m][0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[0m
ipdb> result 
*** NameError: name 'result' is not defined
ipdb> n
> [1;32mc:\users\murilo\documents\mestrado\sistemas mestrado - copia\sistema com grafos(tentati

ipdb> len(E_dist)
26


In [6]:
from biometric_system_euclidiano import BiometricSystem

from anomaly_detectors.M2005 import M2005 
from anomaly_detectors import thresholds
from data_stream import data_stream
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold

from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import cosine_similarity

from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import kneighbors_graph
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import networkx as nx


import json
import ipdb
import os, sys
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
import random
import copy
import IPython.display as ipd


def Average(lst):
    x = sum(lst)
    y = len(lst)
    a = (round(x,6) / round(y))
    return a


def split_data_enrollment(dataset, column, n_samples):
    data_to_enrollment = dict()

    for value in dataset[column].unique():
        
        data_to_enrollment.setdefault(value, dataset.loc[dataset[column]==value].iloc[:(n_samples//2)].loc[:,~dataset.columns.isin([column])].reset_index(drop=True))
    
    return data_to_enrollment

def split_data_validation(dataset, column, n_samples):
    data_to_validation = dict()

    for value in dataset[column].unique():
        
        data_to_validation.setdefault(value, dataset.loc[dataset[column]==value].iloc[(n_samples//2):n_samples].loc[:,~dataset.columns.isin([column])].reset_index(drop=True))
    
    return data_to_validation


def split_data_recognition(dataset, column, n_samples):
    data_to_recognition = dict()

    for value in dataset[column].unique():
 
        data_to_recognition.setdefault(value, dataset.loc[dataset[column]==value].iloc[:n_samples].loc[:,~dataset.columns.isin([column])].reset_index(drop=True))
    
    
    return data_to_recognition

def split_data(dataset, column, n_samples):
    data_to_enrollment = dict()
    data_to_validation = dict()
    data_to_recognition = dict()

    for value in dataset[column].unique():
        
        #Treinamento, pegando dados de usuarios 
        data_to_enrollment.setdefault(value, dataset.loc[dataset[column]==value].iloc[:(n_samples//2)].loc[:,~dataset.columns.isin([column])].reset_index(drop=True))

        # Definir o limiar de decisao dos algoritmos de classificacao
        data_to_validation.setdefault(value, dataset.loc[dataset[column]==value].iloc[(n_samples//2):n_samples].loc[:,~dataset.columns.isin([column])].reset_index(drop=True))

        # Criar o fluxo de teste
        data_to_recognition.setdefault(value, dataset.loc[dataset[column]==value].iloc[n_samples:].loc[:,~dataset.columns.isin([column])].reset_index(drop=True))
    
    
    return data_to_enrollment,data_to_validation, data_to_recognition

#--------------------------------------------------------------------------------------------------------------# 
# INICIO DO TIMER
inicio = timeit.default_timer()
#--------------------------------------------------------------------------------------------------------------# 


df = pd.read_csv('dados/DSL-Modificado.csv', delimiter = ',', index_col = [0])
df = df.drop(['rep'], axis=1)
users = df['subject'].unique()

perc = 0.5
impostor_rate = 0.30
rate_external_impostor = 0
R=1
GRAPH_MIN_CUT_SLIDING = []

#--------------------------------------------------------------------------------------------------------------# 
#Separação de index, primeiro pra treino, segundo para validação e teste
# Dependendo do teste é necessario trocar a linha ("Separação por Index")

sessionIndex1 = 1
#--------------------------------------------------------------------------------------------------------------# 
#Separação de usuarios

len_reg_users = int(len(users) * perc)

kfold = KFold(n_splits=2, shuffle=True, random_state=R)
splits = kfold.split(users)

#--------------------------------------------------------------------------------------------------------------# 
#Registro de usuarios

for i, (reg_users, not_reg_users) in enumerate(splits):

    internal_users = copy.deepcopy(df.loc[df['subject'].isin(users[reg_users])])
    external_users = copy.deepcopy(df.loc[~df['subject'].isin(users[reg_users])])
    
    frames = [internal_users, external_users]

    internal_users = pd.concat(frames)

#--------------------------------------------------------------------------------------------------------------#  
#Separação por Index
dataS1 = internal_users.loc[(internal_users['sessionIndex'] == sessionIndex1)]
dataS1.drop(["sessionIndex"], axis=1, inplace=True)

dataS2 = internal_users.loc[(internal_users['sessionIndex'] != sessionIndex1)]
dataS2.drop(["sessionIndex"], axis=1, inplace=True)
#--------------------------------------------------------------------------------------------------------------#  
# Dados para Treino, Validação e Reconhecimento

#Treinamento, pegando dados de usuarios 
data_to_enrollment = split_data_enrollment(dataS1, column='subject', n_samples=50)

# Definir o limiar de decisao dos algoritmos de classificacao
data_to_validation = split_data_validation(dataS1, column='subject', n_samples=50)

# Criar o fluxo de teste
data_to_recognition = split_data_recognition(dataS2, column='subject', n_samples=350)

_, _, external_users_data = split_data(external_users, column='subject', n_samples=50)



 #Sistema com Adaptação (GraphMinCut)

detector = M2005()
adaptive = "SlidingWindow"
system = BiometricSystem(detector=detector, random_state=R)
system.enrollment_grafos(dataset=data_to_enrollment, adaptive=adaptive)

decision_threshold = thresholds.best_threshold(data_to_validation, system, size=10, random_state=R)

metrics_adaptativo_grafo_sliding = dict()

lista_nao_usadas_grafo_sliding = list()
lista_usadas_grafo_sliding = list()

auxiliar_euclidiano2 = {}

for j, genuine in enumerate(system.users.keys()):


    ipd.clear_output(wait=True)
    print(f"Rodando GraphMinCutSliding Euclidiano")
    print(f"Testando usuário {j+1}/{len(system.users.keys())}")

    datastream = data_stream.Random(impostor_rate= impostor_rate,
                                    rate_external_impostor=rate_external_impostor,
                                    random_state=R)

    test_stream, y_true, amostras_grafo_sliding_genuinas,amostras_grafo_sliding_impostoras = datastream.create(genuine,
                                                                                                data_to_recognition,
                                                                                                external_users_data)



    y_pred, lista_nao_usadas_grafo_sliding2, lista_usadas_grafo_sliding2 = system.autenticate_grafos(genuine,
                                                                                test_stream,
                                                                                decision_threshold=decision_threshold,
                                                                                adaptive_TESTE=adaptive,
                                                                                return_scores=False)

    lista_nao_usadas_grafo_sliding.append(lista_nao_usadas_grafo_sliding2)
    lista_usadas_grafo_sliding.append(lista_usadas_grafo_sliding2)

    fmr, fnmr, b_acc,  y_genuine , y_impostor = system.compute_metrics(y_true, y_pred)
    auxiliar_euclidiano2 = y_genuine.value_counts()[1], y_impostor.value_counts()[1]

    for met in ['fmr','fnmr','b_acc']:
        metrics_adaptativo_grafo_sliding.setdefault(genuine, dict()).setdefault(met,[]).append(eval(met))
    #json.dump(metrics_adaptativo_grafo_sliding, open("metricas_grafo_sliding.json", "w"))


usuarios = metrics_adaptativo_grafo_sliding.keys()
result = pd.DataFrame(metrics_adaptativo_grafo_sliding.values())

fmr_mean = []
fnmr_mean = []
b_acc_mean = []

for i in result['fmr']:
    fmr_mean.append(Average(i))

for i in result['fnmr']:
    fnmr_mean.append(Average(i))

for i in result['b_acc']:
    b_acc_mean.append(Average(i))

metrics_adaptativo_grafo_sliding_mean = pd.DataFrame(list(zip(usuarios, fmr_mean, fnmr_mean,b_acc_mean)),
            columns =['Usuarios','fmr_mean', 'fnmr_mean','b_acc_mean'])

GRAPH_MIN_CUT_SLIDING.append(metrics_adaptativo_grafo_sliding_mean['b_acc_mean'].mean())

#--------------------------------------------------------------------------------------------------------------#     

fim = timeit.default_timer()
horas, rem = divmod(fim-inicio, 3600)
minutos, segundos = divmod(rem, 60)
print("{:0>2}:{:0>2}:{:05.2f}".format(int(horas),int(minutos),segundos))

print('---------------------')
print("Teste com Grafos distancia Euclidiana")
print("GRAPH_MIN_CUT_SLIDING", GRAPH_MIN_CUT_SLIDING)

Rodando GraphMinCutSliding
Testando usuário 51/51
---------------------
Teste com Grafos distancia Euclidiana
GRAPH_MIN_CUT_SLIDING [0.673426705882353]
