 # Desconsiderando os registros que possuam grande concordância ou pouquíssima concordância

In [4]:
import os
import re
import numpy as np
import math
import pandas as pd
from collections import Counter
from sklearn import model_selection
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold

############################################################################################################################
#Definição de funções
############################################################################################################################

#Função para geração do F1 médio
def geraF1(toClass, classificador): #toClass é o conjunto de treinamento
    
    f1 = 0
    
    if classificador == 'SVM':
        modelo = SVC(random_state = seed)
    elif classificador == 'DT':
        modelo = DecisionTreeClassifier(random_state = seed)
    
    #Separação do conjunto X e y
    XtoClass = toClass.iloc[:,1:]
    ytoClass = toClass.duplicata
    
    #Divisão dos conjuntos de treino e teste (20% para esse último)
    X_train, X_test, y_train, y_test = model_selection.train_test_split(XtoClass, ytoClass, test_size=0.20, random_state=seed)
    
    #Validação cruzada
    #This cross-validation object is a variation of KFold that returns stratified folds. The folds are made by preserving the percentage of samples for each class.
    kfold = StratifiedKFold(n_splits=5, random_state=seed) #n_splits igual ao tamanho do conjunto de treino?
    
    try:
        cv_results = model_selection.cross_val_score(modelo, X_train, y_train, cv=kfold, scoring='f1')
    except:
        print("ERRO NA VALIDAÇÃO CRUZADA!")
        print("Conjunto de treinamento:")
        print(toClass)
        
    f1 = cv_results.mean()
    std = cv_results.std()
    coef_var = std/f1
#     tamTreino = len(XtoClass)
    
    return f1, std, coef_var#, tamTreino

#Função para geração do conjunto de treinamento
def geraTrainSet(ct, dir, file1):

    #Desnecessária essa parte se quiser deixar a primeira coluna com o status das duplicatas
    cols = list(ct.columns.values)
    cols.pop(cols.index('duplicata'))
    ct = ct[cols+['duplicata']]
    
    ct.to_csv(dir+file1, sep=';', index=False)     
    
#Função para geração do conjunto de teste
def geraTestSet(ct, dir, file1):

    #Desnecessária essa parte se quiser deixar a primeira coluna com o status das duplicatas
    cols = list(ct.columns.values)
    cols.pop(cols.index('duplicata'))
    ct = ct[cols+['duplicata']]
    
    ct.to_csv(dir+file1, sep=';', index=False)            
    
############################################################################################################################
#Repetições para os experimentos começam aqui
############################################################################################################################

#Parâmetros do usuário (Definir entrada dos dados)
estat_ord = 'min' #Estatística para ordenamento
qtd_alg = 23 #Quantidade total de algoritmos
qtd_alg_nd = 2 #Quantidade máxima de algoritmos para separar o conjunto de possíves não-duplicadas
k = 3 #Tamanho da janela
orcamento = 100 #Ou uma porcentagem da base de dados
orcamento_orig = orcamento
tam_min_ct = 20
seed = 500

etapa = '2 - AA[dg-arj]'

dirOrig = "../../csv/conjuntosDS/conjuntosDiverg/"
estat = "../../csv/estatisticaInicialDS.csv"

estatisticas = pd.read_csv(estat, index_col=['algoritmosUtilizados', 'etapa', 'permutacao'], sep=';')

arquivos = [] #Adicionado depois

for _, _, arquivo in os.walk(dirOrig):
     #print(arquivo)
     arquivos.extend(arquivo)   

for arq in arquivos:
    
    if '_NEW' in arq:
        print("##################################################################")
        print("Analisando o arquivo: {0}".format(arq))
        print("##################################################################")

        num = re.sub('diverg.*\)', r'', arq) #Alterar para fazer a substituição de tudo em uma linha só
        num = num.replace('_NEW.csv','')

        algUtl = re.sub('diverg.*\(', r'', arq) #Alterar para fazer a substituição de tudo em uma linha só
        algUtl = re.sub('\).*', r'', algUtl) #Alterar para fazer a substituição de tudo em uma linha só
        algUtl = int(algUtl)

        permutacao = int(num)
    
        linhaAtual = estatisticas.xs((algUtl, '1 - acm diverg', permutacao))    
            
        ###### Leitura do conjunto de pares conflitantes
        pc = dirOrig+arq
        
        pc = pd.read_csv(pc, sep=';', index_col=['elemento1', 'elemento2']) #pares conflitantes

        cols = list(pc.columns.values)
        cols.pop(cols.index('duplicata'))
        pc = pc[['duplicata']+cols]

        pc_aa = pc.iloc[:, :5 ] #Conjunto onde serão aplicadas as janelas deslizantes
        pc_vetores = pc.iloc[:, 5: ] #Conjunto base para compor o conjunto treinamento 

        # pc_aa = pc.loc[:, :'med' ]
        # pc_vetor = pc.loc[:, 'med': ] #Como referenciar a coluna vizinha à 'med'?

        duplicata = pc_aa.loc[:, 'duplicata' ]

        #Adicionando a coluna de duplicatas a pc_vetores
        pc_vetores = pd.concat([duplicata, pc_vetores], axis=1, ignore_index=False)

        ###### 
        
        #Separação do conjunto de pares conflitantes em dois a partir da quantidade de algoritmos 
        #que aponta o par como possível duplicata 
        #(conjunto A com quantidade de algoritmos = 1, com maioria composta por possíveis não-duplicatas,
        #e conjunto B com quantidade de algoritmos > 1, contendo mais possíveis duplicatas que o conjunto A)
        ndup = pc_aa.loc[pc['qtdAlg'] <= qtd_alg_nd]
        dup = pc_aa.loc[(pc['qtdAlg'] > qtd_alg_nd) & (pc['qtdAlg'] <= (qtd_alg - qtd_alg_nd - 1))]
        # dup = dup[pc['qtdAlg'] <= (qtd_alg - qtd_alg_nd)]

        ###### Ordenamento dos pares pela estatística selecionada
        ndup = ndup.sort_values(estat_ord) 
        dup = dup.sort_values(estat_ord, ascending=False)

        ###### Variáveis para verificar se pode deslizar a janela dentro de dup (ou ndup)

        deslz_dup = len(dup)/k
        deslz_dup

        deslz_ndup = len(ndup)/k
        deslz_ndup
        
        ###### Validação do tamanho para deslizamento
        
        #Se não houver espaço suficiente para deslizar as janelas
        if (deslz_dup < tam_min_ct/2) | (deslz_ndup < tam_min_ct/2):
            you_shall_not_pass = True
        else:
            you_shall_not_pass = False
        
        ###### Criação do dataframe que armazenará o conjunto de treinamento

        conj_treino = pd.DataFrame(columns=pc_vetores.columns.values)

        ###### Povoamento inicial sem aleatoriedade no except

        import sys

        orcamento = 100
        conj_treino = pd.DataFrame(columns=pc_vetores.columns.values)

        continua = True
        f1_anterior = 0
        f1_atual = 0
        f1_svm = 0
        f1_dt = 0

        jan_inic_ndup = 0 
        jan_fin_ndup = k

        jan_inic_dup = 0 
        jan_fin_dup = k

        cont = 0

        entrouExcept = False

        continua = True


        #Povoamento inicial com 20 pares rotulados (10 de cada)
        while (cont < tam_min_ct) & (continua):

            print("cont < 20: {0} - continua: {1}".format(cont < tam_min_ct, continua))

            print("orcamento: {0} - jan_inic_dup: {1} - jan_fin_ndup: {2}".format(orcamento, jan_inic_dup, jan_fin_ndup))

            #Desliza-se a janela
            reexecuta = True

            deslz_ndup -= 1 #Deslizou a janela -> Diminuiu o espaço para ela rodar

            print("reexecuta: {0} - deslz_ndup >= 1: {1}".format(reexecuta, deslz_ndup >= 1))

            while (reexecuta) & (deslz_ndup >= 1):

                grupo = ndup.iloc[jan_inic_ndup:jan_fin_ndup] # three rows of dataframe

                #SELEÇÃO DA POSSÍVEL NÃO-DUPLICATA
                #Seleciona-se o par com menor quantidade de pares em concordância (talvez selecionar o que teve maior concordância também?) para rotulação

                try:

                    id_row_sel = grupo['min'].idxmax() #Retorna o índice da linha com maior valor da coluna ('min') especificada

                    #Só passa pra cá se não lançar except
                    reexecuta = False
                    #Rotula o par, retira do conjunto U (Nesse caso "pc_vetores")
                    vetor_sel = pc_vetores.loc[id_row_sel]
                    conj_treino = conj_treino.append(vetor_sel) #Adicionando o vetor selecionado ao conjunto treino
                    orcamento -= 1


                except ValueError:

                    print("Atualizando janelas no except")
                    jan_inic_ndup = jan_inic_ndup + k
                    jan_fin_ndup = jan_fin_ndup + k

                    deslz_ndup -= 1 #Deslizou a janela -> Diminuiu o espaço para ela rodar

                    print("orcamento: {0} - jan_inic_ndup: {1} - jan_fin_ndup: {2}".format(orcamento, jan_inic_ndup, jan_fin_ndup))
                    print("orcamento: {0} - jan_inic_dup: {1} - jan_fin_dup: {2}".format(orcamento, jan_inic_dup, jan_fin_dup))


                    reexecuta = True

                except:
                    print('Eita!')

            print("Possível não-duplicata")
            print(vetor_sel)

            reexecuta = True

            #SELEÇÃO DA POSSÍVEL DUPLICATA
            #Seleciona-se o par com menor quantidade de pares em concordância (talvez selecionar o que teve maior concordância também?) para rotulação

            deslz_dup -= 1 #Deslizou a janela -> Diminuiu o espaço para ela rodar

            while (reexecuta) & (deslz_dup >= 1):

                grupo = dup.iloc[jan_inic_ndup:jan_fin_ndup] # three rows of dataframe

                try:

                    id_row_sel = grupo['qtdAlg'].idxmin() #Retorna o índice da linha com maior valor da coluna ('min') especificada

                    #Só passa pra cá se não lançar except
                    reexecuta = False #Se selecionou um registro no comando anterior, não precisa reexecutar o while
                    #Rotula o par, retira do conjunto U (Nesse caso "pc_vetores" terá os pares retirados ao final da composição do conjunto de treinamento)
                    vetor_sel = pc_vetores.loc[id_row_sel]
                    conj_treino = conj_treino.append(vetor_sel) #Adicionando o vetor selecionado ao conjunto treino
                    orcamento -= 1

                except ValueError:

                    print("Atualizando janelas no except")
                    jan_inic_dup = jan_inic_dup + k
                    jan_fin_dup = jan_fin_dup + k

                    deslz_dup -= 1 #Deslizou a janela -> Diminuiu o espaço para ela rodar

                    print("orcamento: {0} - jan_inic_ndup: {1} - jan_fin_ndup: {2}".format(orcamento, jan_inic_ndup, jan_fin_ndup))
                    print("orcamento: {0} - jan_inic_dup: {1} - jan_fin_dup: {2}".format(orcamento, jan_inic_dup, jan_fin_dup))


                    reexecuta = True

                except:
                    print('Eita!')

            print("Possível duplicata")
            print(vetor_sel)

            #Atualização das janelas
            jan_inic_ndup = jan_inic_ndup + k
            jan_fin_ndup = jan_fin_ndup + k

            jan_inic_dup = jan_inic_dup + k
            jan_fin_dup = jan_fin_dup + k

            if(deslz_dup < 1) | (deslz_ndup < 1): #Se não houver mais espaço para deslizar as janelas
                continua = False


            cont += 1


        #Treina SVM e Decision Tree (justificar o pq) e verifica-se a média da medida de qualidade (f1, precision...)
        f1_svm, std_svm, cv_svm = geraF1(conj_treino, 'SVM')
        f1_dt, std_dt, cv_dt = geraF1(conj_treino, 'DT')

        f1_atual = (f1_svm + f1_dt)/2
        std_atual = (std_svm + std_dt)/2
        cv_atual = (cv_svm + cv_dt)/2

        ###### Aplicação das janelas deslizantes após o povoamento incial sem aleatoriedade no except

        import warnings
        warnings.filterwarnings('always')  # "error", "ignore", "always", "default", "module" or "once"

        # continua = True
        max_local = 0
        igual = 0

        print("Início")
        print("orcamento: {0}".format(orcamento))
        print("Quantidade de itens no conjunto de treinamento: {}".format(len(conj_treino)))

        while continua:

            f1_anterior = f1_atual

            reexecuta = True

            deslz_ndup -= 1 #Deslizou a janela -> Diminuiu o espaço para ela rodar

            while (reexecuta) & (deslz_ndup >= 1):
                grupo = ndup.iloc[jan_inic_ndup:jan_fin_ndup] # three rows of dataframe

                #SELEÇÃO DA POSSÍVEL NÃO-DUPLICATA
                #Seleciona-se o par com menor quantidade de pares em concordância (talvez selecionar o que teve maior concordância também?) para rotulação

                try:

                    id_row_sel = grupo['min'].idxmax() #Retorna o índice da linha com maior valor da coluna ('min') especificada

                    #Só passa pra cá se não lançar except
                    reexecuta = False #Se selecionou um registro no comando anterior, não precisa reexecutar o while
                    #Rotula o par, retira do conjunto U (Nesse caso "pc_vetores")
                    vetor_sel = pc_vetores.loc[id_row_sel]
                    conj_treino = conj_treino.append(vetor_sel) #Adicionando o vetor selecionado ao conjunto treino
                    orcamento -= 1

                except ValueError:

                    print("Atualizando janelas no except")
                    jan_inic_ndup = jan_inic_ndup + k
                    jan_fin_ndup = jan_fin_ndup + k

                    deslz_ndup -= 1 #Deslizou a janela -> Diminuiu o espaço para ela rodar

                    reexecuta = True

                except:
                    print('Eita!')

            print("Possível não-duplicata")
            print(vetor_sel)

            reexecuta = True

            #SELEÇÃO DA POSSÍVEL DUPLICATA
            #Seleciona-se o par com menor quantidade de pares em concordância (talvez selecionar o que teve maior concordância também?) para rotulação

            deslz_dup -= 1 #Deslizou a janela -> Diminuiu o espaço para ela rodar

            while (reexecuta) & (deslz_dup >= 1):

                grupo = dup.iloc[jan_inic_ndup:jan_fin_ndup] # three rows of dataframe

                try:

                    id_row_sel = grupo['qtdAlg'].idxmin() #Retorna o índice da linha com maior valor da coluna ('min') especificada

                    #Só passa pra cá se não lançar except
                    reexecuta = False
                    #Rotula o par, retira do conjunto U (Nesse caso "pc_vetores" terá os pares retirados ao final da composição do conjunto de treinamento)
                    vetor_sel = pc_vetores.loc[id_row_sel]
                    conj_treino = conj_treino.append(vetor_sel) #Adicionando o vetor selecionado ao conjunto treino
                    orcamento -= 1

                except ValueError:

                    print("Atualizando janelas no except")
                    jan_inic_dup = jan_inic_dup + k
                    jan_fin_dup = jan_fin_dup + k

                    deslz_dup -= 1 #Deslizou a janela -> Diminuiu o espaço para ela rodar

                    reexecuta = True

                except:
                    print('Eita!')

            print("Possível duplicata")
            print(vetor_sel)

            #Treina SVM e Decision Tree (justificar o pq) e verifica-se a média da medida de qualidade (f1, precision...)
            f1_svm, std_svm, cv_svm = geraF1(conj_treino, 'SVM')
            f1_dt, std_dt, cv_dt = geraF1(conj_treino, 'DT')

            f1_atual = (f1_svm + f1_dt)/2
            std_atual = (std_svm + std_dt)/2
            cv_atual = (cv_svm + cv_dt)/2 #Coeficiente de variação

            cv_atual = cv_atual*100


            if orcamento > 0 : #Se ainda tem orçamento pra gastar
                if cv_atual < 10:
                    if (f1_atual >= f1_anterior) and (igual <= 3): #Selecionam-se novos pares para rotulação
          
                        if f1_atual == f1_anterior:
                            igual += 1
                            print("igual: {0}".format(igual))
                        elif (f1_atual > f1_anterior) and (igual > 0):
                            igual = 0

                        continua = True
                    elif (f1_atual < f1_anterior):
                        max_local += 1
                        #Se entrar aqui tem que remover os últimos pares que entraram no conjunto de treinamento

                        #IMPORTANTE! Fazer com que esses devem sejam extraídos para o conjunto PM, não apenas descartados
                        conj_treino.drop(conj_treino.tail(2).index,inplace=True)

                        print("f1_atual < f1_anterior")
                        print("F1 anterior: {0} - F1 atual: {1}".format(f1_anterior, f1_atual))

                        if (max_local <= 1):
                            f1_atual = f1_anterior
                        else:
                            f1_atual = f1_anterior
                            print('O f1 passou a piorar')
                            print("F1 anterior: {0} - F1 atual: {1}".format(f1_anterior, f1_atual))
                            continua = False
                            break
                    elif (igual > 3): #Se a medida de qualidade for igual à anterior (por três vezes)
                            print('F1 convergiu!') 
                            continua = False
                            break

            else:
                continua = False
                break

            #Atualização das janelas
            jan_inic_ndup = jan_inic_ndup + k
            jan_fin_ndup = jan_fin_ndup + k

            jan_inic_dup = jan_inic_dup + k
            jan_fin_dup = jan_fin_dup + k

            if(deslz_dup < 1) | (deslz_ndup < 1): #Se não houver mais espaço para deslizar as janelas
                continua = False

            print("F1 anterior: {0} - F1 atual: {1}".format(f1_anterior, f1_atual))
            print("igual: {0}".format(igual))

        ##### Salvando os arquivos de treino e teste

        dirDest = "../../csv/conjuntosDS/treinoTeste/"
        
        abordagem = 'DS'
        
        iteracao = 1
        inspecoesManuais = orcamento_orig - orcamento
        
        duplicatas = [i for i in conj_treino.duplicata if i == True]
        duplicatas = duplicatas.count(True)
        
        nao_duplicatas = [i for i in conj_treino.duplicata if i == False]
        nao_duplicatas = nao_duplicatas.count(True)
        
        da = linhaAtual['da'].item()
        dm = duplicatas
        ndm = nao_duplicatas

        tp = float(linhaAtual['tp'].item() + dm)
        fp = float(linhaAtual['fp'].item())
        tn = float(linhaAtual['tn'].item())# + ndm) #Retirado
        fn = float(linhaAtual['fn'].item() - dm) #Adicionado
        
        precision = tp/(tp+fp)
        recall = tp/(tp+fn)
        fmeasure = 2*((precision*recall)/(precision+recall))
        
        #Adicionando valor à última linha
        estatisticas.loc[(algUtl, etapa, permutacao), ['abordagem', 'iteracao', 'inspecoesManuais',
           'precision', 'recall', 'f-measure', 'da', 'dm', 'ndm', 'tp',
           'fp', 'tn', 'fn'] ] = ([abordagem, iteracao, inspecoesManuais,
           precision, recall, fmeasure, da, dm, ndm, tp, fp, tn, fn])
        
        dirDest = "../../csv/conjuntosDS/treinoTeste/"
#         dirDest = "../../Documents/NetBeansProjects/Master-SKYAM/AS/src/csv/conjuntosDS/treinoTeste/"
#         dirDest = "./arqResult/csv/conjuntosDS/conjuntosDiverg/treinoTeste/"
        
        #algUtl = str(algUtl).replace('.0','')
        algUtl = str(algUtl)
        
        geraTrainSet(conj_treino, dirDest, 'train' + '(' + algUtl + ')' + num + '.csv')

        indicesCT = conj_treino.index.values.tolist()

        geral = pd.concat([pc_vetores,conj_treino]) #Concatenando pc_vetores e conj_treino

        #Resta para compor o conjunto teste tudo aquilo que está em pc_vetores, mas não em conj_treino
        conj_teste = geral.drop(indicesCT, axis='rows') 

        geraTestSet(conj_teste, dirDest, 'test' + '(' + algUtl + ')' + num + '.csv')
        
        
        

############################################################################################################################
#Estatísticas
############################################################################################################################

#Para voltar o dataframe ao normal
estatisticas = estatisticas.reset_index(level=['algoritmosUtilizados', 'etapa', 'permutacao'])

estatisticas = estatisticas[['abordagem', 'etapa', 'algoritmosUtilizados', 'permutacao', 'iteracao', 'inspecoesManuais', 'precision', 'recall', 'f-measure', 'da', 'dm', 'ndm', 'tp', 'fp', 'tn', 'fn']]

estatisticas[['algoritmosUtilizados', 'iteracao', 'inspecoesManuais', 'da', 'dm', 'ndm', 'tp', 'fp', 'tn', 'fn']] = \
estatisticas[['algoritmosUtilizados', 'iteracao', 'inspecoesManuais', 'da', 'dm', 'ndm', 'tp', 'fp', 'tn', 'fn']].astype(int)

dirEst = "../../csv/"

# Diretório para Windows
# dirEst = "C:\Users\Diego\Documents\NetBeansProjects\Master-SKYAM\AS\src\csv\\"
# dirEst = "../../Documents/NetBeansProjects/Master-SKYAM/AS/src/csv/"


# Diretório para Linux
# dirEst = "./arqResult/csv/"

estatisticas.to_csv(dirEst+'estatisticaInicialDS2-DgArj.csv', sep=';', index=False)

  num = re.sub('diverg.*\)', r'', arq) #Alterar para fazer a substituição de tudo em uma linha só
  algUtl = re.sub('diverg.*\(', r'', arq) #Alterar para fazer a substituição de tudo em uma linha só
  algUtl = re.sub('\).*', r'', algUtl) #Alterar para fazer a substituição de tudo em uma linha só


##################################################################
Analisando o arquivo: diverg(10)1000_NEW.csv
##################################################################
cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.666667
track02      0.500000
track03      0.600000
track10      0.500000
track11      0.615385
Name: (100558, 105098), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       0.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (1858, 9637), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.304348
track02      0.571429
track03      

F1 anterior: 0.926984126984127 - F1 atual: 0.9403174603174603
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.545455
track02      0.785714
track03      0.500000
track10      0.444444
track11      0.692308
Name: (100558, 105076), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      0.372093
track03      0.833333
track10      1.000000
track11      0.838710
Name: (7542, 8823), dtype: float64
f1_atual < f1_anterior
F1 anterior: 0.9403174603174603 - F1 atual: 0.9301587301587302
F1 anterior: 0.9403174603174603 - F1 atual: 0.9403174603174603
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.727273
track02      0.571429
track03      0.750000
track10      0.166667
track11      0.812500
Name: (100856, 103398), dtype: float64
Possível duplicata
duplicata    0.000000
title        0.495000
ar

Início
orcamento: 60
Quantidade de itens no conjunto de treinamento: 40
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.233333
track02      0.483871
track03      0.578947
track10      0.454545
track11      0.714286
Name: (100717, 106511), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.966667
artist       1.000000
track01      0.800000
track02      0.928571
track03      0.916667
track10      0.714286
track11      0.866667
Name: (6944, 7731), dtype: float64
F1 anterior: 0.9457142857142857 - F1 atual: 0.9488888888888889
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.346154
track02      0.521739
track03      0.764706
track10      0.769231
track11      0.454545
Name: (104117, 104872), dtype: float64
Possível duplicata
duplicata    0.000000
title        0.845238
artist       1.000000
track01      0.849057
track02      0.851064
track03      0.683333

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível duplicata
duplicata    1.000000
title        1.000000
artist       0.694444
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (3086, 3215), dtype: float64
cont < 20: True - continua: True
orcamento: 68 - jan_inic_dup: 48 - jan_fin_ndup: 51
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.041667
track02      0.500000
track03      0.550000
track10      0.875000
track11      0.714286
Name: (108429, 2004), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.629630
track02      0.696970
track03      0.607143
track10      0.615385
track11      0.192308
Name: (8274, 8630), dtype: float64
cont < 20: True - continua: True
orcamento: 66 - jan_inic_dup: 51 - jan_fin_ndup: 54
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title    

Possível duplicata
duplicata    0.000000
title        0.466667
artist       1.000000
track01      0.916667
track02      0.842105
track03      0.882353
track10      0.171429
track11      0.090909
Name: (103350, 104042), dtype: float64
F1 anterior: 0.9064285714285714 - F1 atual: 0.8214285714285714
igual: 0
Possível não-duplicata
duplicata    0.000000
title        0.666667
artist       0.000000
track01      0.360000
track02      0.833333
track03      0.772727
track10      0.785714
track11      0.227273
Name: (104289, 104872), dtype: float64
Possível duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.888889
track02      0.875000
track03      0.375000
track10      0.666667
track11      0.533333
Name: (103398, 105098), dtype: float64
F1 anterior: 0.8214285714285714 - F1 atual: 0.838888888888889
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.300000
track02      0.785714
track03      0.818

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.636364
track02      0.727273
track03      0.388889
track10      0.625000
track11      0.333333
Name: (107056, 107734), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       0.777778
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (10314, 8640), dtype: float64
cont < 20: True - continua: True
orcamento: 74 - jan_inic_dup: 39 - jan_fin_ndup: 42
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.777778
track02      0.727273
track03      0.875000
track10      0.833333
track11      0.333333
Name: (102477, 103398), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.955556
artist       0.694444
track01      1.000000
track02      1.000000
track03      0.968750
track10      1.000000
track11      0.95

  res = shell.run_cell(code, store_history=store_history, silent=silent)


duplicata    1.000000
title        0.975000
artist       1.000000
track01      1.000000
track02      1.000000
track03      0.823529
track10      0.785714
track11      1.000000
Name: (6740, 9193), dtype: float64
cont < 20: True - continua: True
orcamento: 88 - jan_inic_dup: 18 - jan_fin_ndup: 21
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.583333
track02      0.722222
track03      0.888889
track10      0.882353
track11      0.785714
Name: (103056, 105956), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.950000
artist       1.000000
track01      0.937500
track02      0.947368
track03      1.000000
track10      1.000000
track11      1.000000
Name: (2427, 8640), dtype: float64
cont < 20: True - continua: True
orcamento: 86 - jan_inic_dup: 21 - jan_fin_ndup: 24
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
arti

F1 anterior: 0.9003174603174602 - F1 atual: 0.9117460317460317
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.789474
track02      0.833333
track03      0.173913
track10      0.866667
track11      0.714286
Name: (103424, 104842), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.644444
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (4441, 7769), dtype: float64
F1 anterior: 0.9117460317460317 - F1 atual: 0.8956709956709956
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      1.000000
track02      0.714286
track03      0.600000
track10      0.647059
track11      0.529412
Name: (104099, 109749), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000

  res = shell.run_cell(code, store_history=store_history, silent=silent)


duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.944444
track02      1.000000
track03      0.900000
track10      0.937500
track11      0.866667
Name: (10061, 3517), dtype: float64
cont < 20: True - continua: True
orcamento: 90 - jan_inic_dup: 15 - jan_fin_ndup: 18
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.416667
track02      0.357143
track03      0.350000
track10      0.411765
track11      0.625000
Name: (104099, 2004), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.975000
artist       1.000000
track01      1.000000
track02      1.000000
track03      0.823529
track10      0.785714
track11      1.000000
Name: (6740, 9193), dtype: float64
cont < 20: True - continua: True
orcamento: 88 - jan_inic_dup: 18 - jan_fin_ndup: 21
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artis

igual: 1
F1 anterior: 1.0 - F1 atual: 1.0
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.066667
track02      0.650000
track03      0.578947
track10      0.500000
track11      0.285714
Name: (102514, 108429), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.644444
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (4441, 7769), dtype: float64
igual: 2
F1 anterior: 1.0 - F1 atual: 1.0
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.250000
track02      0.714286
track03      0.500000
track10      0.388889
track11      0.812500
Name: (100558, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.952381
track11      1.000000

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível duplicata
duplicata    1.000000
title        0.933333
artist       1.000000
track01      0.942857
track02      1.000000
track03      0.916667
track10      0.952381
track11      0.913043
Name: (8890, 9063), dtype: float64
cont < 20: True - continua: True
orcamento: 84 - jan_inic_dup: 24 - jan_fin_ndup: 27
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.807692
track02      0.434783
track03      0.700000
track10      0.538462
track11      0.687500
Name: (104117, 2004), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      0.928571
track03      0.916667
track10      0.857143
track11      0.933333
Name: (6944, 9868), dtype: float64
cont < 20: True - continua: True
orcamento: 82 - jan_inic_dup: 27 - jan_fin_ndup: 30
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title    

F1 anterior: 1.0 - F1 atual: 0.9380952380952381
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.304348
track02      0.714286
track03      0.625000
track10      0.294118
track11      0.238095
Name: (102477, 104310), dtype: float64
Possível duplicata
duplicata    0.000000
title        0.845238
artist       1.000000
track01      0.849057
track02      0.851064
track03      0.683333
track10      0.418182
track11      0.519231
Name: (109296, 109299), dtype: float64
f1_atual < f1_anterior
F1 anterior: 0.9380952380952381 - F1 atual: 0.896031746031746
F1 anterior: 0.9380952380952381 - F1 atual: 0.9380952380952381
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.458333
track02      0.909091
track03      0.250000
track10      0.875000
track11      0.562500
Name: (105076, 2004), dtype: float64
Possível duplicata
duplicata    1.00
title        0.52
artist       1.00
track

  res = shell.run_cell(code, store_history=store_history, silent=silent)


##################################################################
Analisando o arquivo: diverg(10)106_NEW.csv
##################################################################
cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.807692
track02      0.434783
track03      0.700000
track10      0.538462
track11      0.687500
Name: (104117, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.148148
track11      0.333333
Name: (4384, 5657), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.30000

Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (7134, 9695), dtype: float64
igual: 1
F1 anterior: 1.0 - F1 atual: 1.0
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.458333
track02      0.714286
track03      0.600000
track10      0.194444
track11      0.937500
Name: (100856, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.952381
track11      1.000000
Name: (2026, 4601), dtype: float64
igual: 2
F1 anterior: 1.0 - F1 atual: 1.0
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.315789
track02      0.866667
track03      0.416667
track10      0.333333
track11      0.270833
Name: (100558, 108876), dtype: float64
Poss

  res = shell.run_cell(code, store_history=store_history, silent=silent)


cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.500000
track02      0.454545
track03      0.818182
track10      0.000000
track11      0.000000
Name: (105349, 105518), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.148148
track11      0.333333
Name: (4384, 5657), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.416667
track02      0.909091
track03      0.500000
track10      0.636364
track11      0.888889
Name: (105349, 2004), dtype: float64
Possível duplicata
duplicata    1.0
title        

igual: 1
F1 anterior: 1.0 - F1 atual: 1.0
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.346154
track02      0.521739
track03      0.764706
track10      0.769231
track11      0.454545
Name: (104117, 104872), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.644444
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (4441, 7769), dtype: float64
igual: 2
F1 anterior: 1.0 - F1 atual: 1.0
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.666667
track02      0.285714
track03      0.555556
track10      0.259259
track11      0.250000
Name: (103056, 103830), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.952381
track11      1.0000

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.000000
title        0.975000
artist       1.000000
track01      1.000000
track02      1.000000
track03      0.823529
track10      0.785714
track11      1.000000
Name: (6740, 9193), dtype: float64
cont < 20: True - continua: True
orcamento: 88 - jan_inic_dup: 18 - jan_fin_ndup: 21
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.625000
artist       1.000000
track01      0.216667
track02      0.160000
track03      0.150943
track10      0.093750
track11      0.203125
Name: (108797, 108932), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.950000
artist       1.000000
track01      0.937500
track02      0.947368
track03      1.000000
track10      1.000000
track11      1.000000
Name: (2427, 8640), dtype: float64
cont < 20: True - continua: True
orcamento: 86 - jan_inic_dup: 21 - jan_fin_ndup: 24
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.226349
art

F1 anterior: 0.9428571428571428 - F1 atual: 0.9571428571428571
igual: 0
Possível não-duplicata
duplicata    0.000000
title        0.750000
artist       1.000000
track01      0.222222
track02      0.095238
track03      0.166667
track10      0.138889
track11      0.277778
Name: (100331, 100785), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.952381
track11      1.000000
Name: (2026, 4601), dtype: float64
igual: 1
F1 anterior: 0.9571428571428571 - F1 atual: 0.9571428571428571
igual: 1
Possível não-duplicata
duplicata    0.000000
title        0.600000
artist       0.944444
track01      0.382353
track02      0.022727
track03      0.303030
track10      0.396552
track11      0.147059
Name: (109199, 109365), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.566667
artist       1.000000
track01      1.000000
track02      1.000000
track03     

  res = shell.run_cell(code, store_history=store_history, silent=silent)


##################################################################
Analisando o arquivo: diverg(10)109_NEW.csv
##################################################################
cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.807692
track02      0.434783
track03      0.700000
track10      0.538462
track11      0.687500
Name: (104117, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.148148
track11      0.333333
Name: (4384, 5657), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.23333

Início
orcamento: 60
Quantidade de itens no conjunto de treinamento: 40
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.500000
track02      0.451613
track03      0.631579
track10      0.714286
track11      0.666667
Name: (106511, 107620), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.629630
track02      0.696970
track03      0.607143
track10      0.615385
track11      0.192308
Name: (8630, 8805), dtype: float64
F1 anterior: 0.8597619047619047 - F1 atual: 0.8984126984126983
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.400000
track02      0.736842
track03      0.529412
track10      0.750000
track11      0.366667
Name: (100054, 101555), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.750000
artist       1.000000
track01      0.777778
track02      1.000000
track03      1.000000

  res = shell.run_cell(code, store_history=store_history, silent=silent)


##################################################################
Analisando o arquivo: diverg(10)10_NEW.csv
##################################################################
cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.500000
track02      0.454545
track03      0.818182
track10      0.000000
track11      0.000000
Name: (105349, 105518), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.148148
track11      0.333333
Name: (4384, 5657), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.4166

f1_atual < f1_anterior
F1 anterior: 0.9514285714285715 - F1 atual: 0.9434920634920635
F1 anterior: 0.9514285714285715 - F1 atual: 0.9514285714285715
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.250000
track02      0.714286
track03      0.500000
track10      0.388889
track11      0.812500
Name: (100558, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.420000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      0.947368
Name: (11531, 3134), dtype: float64
F1 anterior: 0.9514285714285715 - F1 atual: 0.8977777777777778
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.304348
track02      0.714286
track03      0.625000
track10      0.294118
track11      0.238095
Name: (102477, 104310), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.436111
ar

  res = shell.run_cell(code, store_history=store_history, silent=silent)


cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.512222
artist       1.000000
track01      0.086957
track02      0.190476
track03      0.107143
track10      0.225806
track11      0.204082
Name: (101556, 105525), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.148148
track11      0.333333
Name: (4384, 5657), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.518182
artist       1.000000
track01      0.179487
track02      0.244898
track03      0.111111
track10      0.181818
track11      0.103448
Name: (104862, 107811), dtype: float64
Possível duplicata
duplicata    1.0
title      

  'precision', 'predicted', average, warn_for)
  res = shell.run_cell(code, store_history=store_history, silent=silent)


F1 anterior: 0.8014285714285714 - F1 atual: 0.9117460317460317
igual: 0
Possível não-duplicata
duplicata    0.000000
title        0.700000
artist       1.000000
track01      0.258065
track02      0.205128
track03      0.175439
track10      0.181818
track11      0.213115
Name: (109420, 109429), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.879167
artist       1.000000
track01      0.750000
track02      0.750000
track03      0.735294
track10      0.733333
track11      0.722222
Name: (1818, 7075), dtype: float64
F1 anterior: 0.9117460317460317 - F1 atual: 0.9177777777777778
igual: 0
Possível não-duplicata
duplicata    0.000000
title        0.733333
artist       1.000000
track01      0.233333
track02      0.161290
track03      0.156863
track10      0.200000
track11      0.241379
Name: (103155, 106396), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.420000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000

igual: 1
F1 anterior: 1.0 - F1 atual: 1.0
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.066667
track02      0.650000
track03      0.578947
track10      0.500000
track11      0.285714
Name: (102514, 108429), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (7192, 7778), dtype: float64
igual: 2
F1 anterior: 1.0 - F1 atual: 1.0
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.250000
track02      0.714286
track03      0.500000
track10      0.388889
track11      0.812500
Name: (100558, 2004), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.952381
track11      1.000000
Name: (2026, 4601), dtype: float64
f1_a

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.000000
title        0.880000
artist       1.000000
track01      1.000000
track02      0.928571
track03      1.000000
track10      0.888889
track11      1.000000
Name: (6756, 8537), dtype: float64
cont < 20: True - continua: True
orcamento: 90 - jan_inic_dup: 15 - jan_fin_ndup: 18
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.416667
track02      0.357143
track03      0.350000
track10      0.411765
track11      0.625000
Name: (104099, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.955556
artist       0.694444
track01      1.000000
track02      1.000000
track03      0.968750
track10      1.000000
track11      0.950000
Name: (3096, 3123), dtype: float64
cont < 20: True - continua: True
orcamento: 88 - jan_inic_dup: 18 - jan_fin_ndup: 21
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artis

F1 anterior: 0.9046031746031746 - F1 atual: 0.9077777777777778
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.666667
track02      0.285714
track03      0.555556
track10      0.259259
track11      0.250000
Name: (103056, 103830), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.750000
artist       1.000000
track01      0.777778
track02      1.000000
track03      1.000000
track10      1.000000
track11      0.176471
Name: (10383, 7653), dtype: float64
F1 anterior: 0.9077777777777778 - F1 atual: 0.9055555555555554
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.800000
track02      0.666667
track03      0.380952
track10      0.550000
track11      0.428571
Name: (101021, 102831), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.433333
artist       1.000000
track01      1.000000
track02      1.000000
track03      0.82352

  res = shell.run_cell(code, store_history=store_history, silent=silent)


duplicata    1.000000
title        0.900000
artist       1.000000
track01      1.000000
track02      0.909091
track03      0.600000
track10      0.875000
track11      0.958333
Name: (4586, 7633), dtype: float64
cont < 20: True - continua: True
orcamento: 86 - jan_inic_dup: 21 - jan_fin_ndup: 24
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.370370
track02      0.321429
track03      0.600000
track10      0.650000
track11      0.642857
Name: (101021, 104099), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.933333
artist       1.000000
track01      0.942857
track02      1.000000
track03      0.916667
track10      0.952381
track11      0.913043
Name: (6802, 8890), dtype: float64
cont < 20: True - continua: True
orcamento: 84 - jan_inic_dup: 24 - jan_fin_ndup: 27
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
arti

Possível não-duplicata
duplicata    0.000000
title        0.518182
artist       1.000000
track01      0.179487
track02      0.244898
track03      0.111111
track10      0.181818
track11      0.103448
Name: (104862, 107811), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.966667
artist       1.000000
track01      0.800000
track02      0.928571
track03      0.916667
track10      0.714286
track11      0.866667
Name: (6944, 7731), dtype: float64
f1_atual < f1_anterior
F1 anterior: 0.9657142857142857 - F1 atual: 0.9546031746031746
O f1 passou a piorar
F1 anterior: 0.9657142857142857 - F1 atual: 0.9657142857142857
##################################################################
Analisando o arquivo: diverg(10)114_NEW.csv
##################################################################
cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.000000
title        0.666667
artist       1.000000
track01      0.933333
track02      1.000000
track03      0.947368
track10      1.000000
track11      0.947368
Name: (8370, 9876), dtype: float64
cont < 20: True - continua: True
orcamento: 62 - jan_inic_dup: 57 - jan_fin_ndup: 60
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.809524
track02      1.000000
track03      0.750000
track10      0.000000
track11      0.000000
Name: (105518, 109651), dtype: float64
Possível duplicata
duplicata    1.00000
title        1.00000
artist       1.00000
track01      1.00000
track02      0.52381
track03      1.00000
track10      1.00000
track11      1.00000
Name: (1838, 7934), dtype: float64
Início
orcamento: 60
Quantidade de itens no conjunto de treinamento: 40
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.818182
track02      0.83333

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      0.909091
track03      0.705882
track10      1.000000
track11      1.000000
Name: (7363, 8118), dtype: float64
Início
orcamento: 60
Quantidade de itens no conjunto de treinamento: 40
Possível não-duplicata
duplicata    0.000000
title        0.637143
artist       1.000000
track01      0.250000
track02      0.133333
track03      0.205882
track10      0.187500
track11      0.176471
Name: (100655, 107814), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (7134, 9695), dtype: float64
F1 anterior: 0.946031746031746 - F1 atual: 0.9746031746031746
igual: 0
Possível não-duplicata
duplicata    0.000000
title        0.666667
artist       1.000000
track01      0.224138
track02      0.194444
track03      0.228571
track10      0.173077
track11      0.138889
Name: (106868, 108

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.769231
track02      0.217391
track03      0.785714
track10      0.000000
track11      0.000000
Name: (104117, 105518), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      0.923077
track03      0.842105
track10      0.823529
track11      1.000000
Name: (7859, 9798), dtype: float64
cont < 20: True - continua: True
orcamento: 92 - jan_inic_dup: 12 - jan_fin_ndup: 15
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.555556
track02      0.318182
track03      0.333333
track10      0.200000
track11      0.666667
Name: (101480, 104418), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.944444
track02      1.000000
track03      0.900000
track10      0.937500
track11      0.866

igual: 1
F1 anterior: 1.0 - F1 atual: 1.0
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.333333
track02      0.600000
track03      0.800000
track10      0.777778
track11      0.812500
Name: (102670, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      0.909091
track03      0.705882
track10      1.000000
track11      1.000000
Name: (7363, 8118), dtype: float64
igual: 2
F1 anterior: 1.0 - F1 atual: 1.0
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.769231
track02      0.521739
track03      0.785714
track10      0.928571
track11      1.000000
Name: (103997, 104117), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.644444
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.416667
track02      0.357143
track03      0.350000
track10      0.411765
track11      0.625000
Name: (104099, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.975000
artist       1.000000
track01      1.000000
track02      1.000000
track03      0.823529
track10      0.785714
track11      1.000000
Name: (6740, 9193), dtype: float64
cont < 20: True - continua: True
orcamento: 88 - jan_inic_dup: 18 - jan_fin_ndup: 21
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.461538
track02      0.608696
track03      0.684211
track10      0.307692
track11      0.545455
Name: (102514, 104117), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (4291, 490

igual: 2
F1 anterior: 1.0 - F1 atual: 1.0
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.250000
track02      0.714286
track03      0.500000
track10      0.388889
track11      0.812500
Name: (100558, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.644444
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (4441, 7769), dtype: float64
f1_atual < f1_anterior
F1 anterior: 1.0 - F1 atual: 0.9514285714285715
F1 anterior: 1.0 - F1 atual: 1.0
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.304348
track02      0.714286
track03      0.625000
track10      0.294118
track11      0.238095
Name: (102477, 104310), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.000000
track02      1.000000
tra

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      0.928571
track03      0.916667
track10      0.857143
track11      0.933333
Name: (6944, 9868), dtype: float64
cont < 20: True - continua: True
orcamento: 78 - jan_inic_dup: 33 - jan_fin_ndup: 36
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.444444
track02      0.642857
track03      0.421053
track10      0.200000
track11      0.555556
Name: (101021, 102514), dtype: float64
Possível duplicata
duplicata    1.00
title        1.00
artist       1.00
track01      1.00
track02      0.75
track03      1.00
track10      1.00
track11      1.00
Name: (6382, 7920), dtype: float64
cont < 20: True - continua: True
orcamento: 76 - jan_inic_dup: 36 - jan_fin_ndup: 39
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível duplicata
duplicata    1.000000
title        0.966667
artist       1.000000
track01      0.800000
track02      0.928571
track03      0.916667
track10      0.714286
track11      0.866667
Name: (6944, 7731), dtype: float64
cont < 20: True - continua: True
orcamento: 68 - jan_inic_dup: 48 - jan_fin_ndup: 51
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.833333
track02      0.260870
track03      0.666667
track10      0.846154
track11      0.523810
Name: (102831, 104117), dtype: float64
Possível duplicata
duplicata    0.000000
title        0.845238
artist       1.000000
track01      0.849057
track02      0.851064
track03      0.683333
track10      0.418182
track11      0.519231
Name: (109296, 109299), dtype: float64
cont < 20: True - continua: True
orcamento: 66 - jan_inic_dup: 51 - jan_fin_ndup: 54
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
tit

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.703704
track02      0.750000
track03      0.727273
track10      0.700000
track11      0.818182
Name: (101021, 103997), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       0.777778
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (10314, 8640), dtype: float64
cont < 20: True - continua: True
orcamento: 72 - jan_inic_dup: 42 - jan_fin_ndup: 45
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.240000
track02      0.714286
track03      0.454545
track10      0.777778
track11      0.590909
Name: (100558, 104289), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.777778
track02      0.906250
track03      0.769231
track10      0.884615
track11      0.86

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível duplicata
duplicata    1.000000
title        0.666667
artist       1.000000
track01      0.933333
track02      1.000000
track03      0.947368
track10      1.000000
track11      0.947368
Name: (8370, 9876), dtype: float64
cont < 20: True - continua: True
orcamento: 62 - jan_inic_dup: 57 - jan_fin_ndup: 60
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.636364
track02      0.818182
track03      0.437500
track10      0.312500
track11      0.833333
Name: (102477, 107734), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      0.909091
track03      0.705882
track10      1.000000
track11      1.000000
Name: (7363, 8118), dtype: float64
Início
orcamento: 60
Quantidade de itens no conjunto de treinamento: 40
Possível não-duplicata
duplicata    0.000000
title        0.637143
artist       1.000000
track01      0.2

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.300000
track02      0.466667
track03      0.500000
track10      0.818182
track11      0.421053
Name: (100717, 105098), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       0.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (2004, 3710), dtype: float64
cont < 20: True - continua: True
orcamento: 96 - jan_inic_dup: 6 - jan_fin_ndup: 9
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.250000
track02      0.714286
track03      0.500000
track10      0.388889
track11      0.812500
Name: (100558, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.933333
track02      1.000000
track03      1.000000
track10      0.900000
track11      1.000000
Name: (10240, 7367), dtype: float64
cont

Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.666667
track02      0.571429
track03      0.562500
track10      0.333333
track11      0.866667
Name: (100558, 103398), dtype: float64
Possível duplicata
duplicata    1.00000
title        1.00000
artist       1.00000
track01      1.00000
track02      0.52381
track03      1.00000
track10      1.00000
track11      1.00000
Name: (1838, 7934), dtype: float64
igual: 1
F1 anterior: 1.0 - F1 atual: 1.0
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.555556
track02      0.428571
track03      0.500000
track10      0.700000
track11      0.642857
Name: (101021, 103424), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.644444
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (4441, 7769), dtype: float64
igual: 2
F1 anterior:

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.416667
track02      0.909091
track03      0.500000
track10      0.636364
track11      0.888889
Name: (105349, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.148148
track11      0.333333
Name: (4384, 5657), dtype: float64
cont < 20: True - continua: True
orcamento: 96 - jan_inic_dup: 6 - jan_fin_ndup: 9
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.461538
track02      0.409091
track03      0.833333
track10      0.263158
track11      0.687500
Name: (101480, 104396), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.933333
track02      1.000000
track03      1.000000
track10      0.900000
track11      1.000000


Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.333333
track02      0.600000
track03      0.800000
track10      0.777778
track11      0.812500
Name: (102670, 2004), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (2252, 4733), dtype: float64
F1 anterior: 0.9546031746031746 - F1 atual: 0.98
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.769231
track02      0.521739
track03      0.785714
track10      0.928571
track11      1.000000
Name: (103997, 104117), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.952381
track11      1.000000
Name: (2026, 4601), dtype: float64
f1_atual < f1_anterior
F1 anterior: 0.98 - F1 at

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    0.000000
title        0.666667
artist       0.000000
track01      0.300000
track02      0.466667
track03      0.529412
track10      0.722222
track11      0.684211
Name: (100521, 100558), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.777778
track02      0.906250
track03      0.769231
track10      0.884615
track11      0.863636
Name: (1884, 9542), dtype: float64
cont < 20: True - continua: True
orcamento: 88 - jan_inic_dup: 18 - jan_fin_ndup: 21
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.375000
track02      0.384615
track03      0.772727
track10      0.225806
track11      0.312500
Name: (105066, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.666667
artist       1.000000
track01      0.933333
track02      1.000000
track03      0.947368
track10      1.000000
track11      0.94736

F1 anterior: 0.8838095238095238 - F1 atual: 0.8838095238095238
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.222222
track02      0.642857
track03      0.800000
track10      0.750000
track11      0.692308
Name: (100558, 101021), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.879167
artist       1.000000
track01      0.750000
track02      0.750000
track03      0.735294
track10      0.733333
track11      0.722222
Name: (1818, 7075), dtype: float64
F1 anterior: 0.8838095238095238 - F1 atual: 0.9434920634920635
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.461538
track02      0.608696
track03      0.642857
track10      0.705882
track11      0.785714
Name: (103056, 104117), dtype: float64
Possível duplicata
duplicata    0.000000
title        1.000000
artist       0.555556
track01      1.000000
track02      0.928571
track03      1.000000

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.500000
track02      0.285714
track03      0.750000
track10      0.612903
track11      0.200000
Name: (4676, 5746), dtype: float64
Início
orcamento: 60
Quantidade de itens no conjunto de treinamento: 40
Possível não-duplicata
duplicata    0.000000
title        0.666667
artist       0.000000
track01      0.350000
track02      0.612903
track03      0.631579
track10      0.500000
track11      0.633333
Name: (101555, 106511), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.879167
artist       1.000000
track01      0.750000
track02      0.750000
track03      0.735294
track10      0.733333
track11      0.722222
Name: (1818, 7075), dtype: float64
F1 anterior: 0.79 - F1 atual: 0.8464285714285714
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.631579
track02      0.583333
track03      0.391304
track10      0.82

Possível não-duplicata
duplicata    0.000000
title        0.666667
artist       0.000000
track01      0.650000
track02      0.600000
track03      0.705882
track10      0.727273
track11      0.850000
Name: (100521, 104533), dtype: float64
Possível duplicata
duplicata    0.000000
title        0.186111
artist       1.000000
track01      1.000000
track02      1.000000
track03      0.333333
track10      0.111111
track11      0.238095
Name: (105016, 107434), dtype: float64
F1 anterior: 0.8032828282828283 - F1 atual: 0.8329797979797979
igual: 0
Possível não-duplicata
duplicata    0.000000
title        0.652083
artist       1.000000
track01      0.173913
track02      0.187500
track03      0.166667
track10      0.166667
track11      0.062500
Name: (101107, 109637), dtype: float64
Possível duplicata
duplicata    0.000000
title        1.000000
artist       0.555556
track01      0.941176
track02      0.956522
track03      0.958333
track10      0.937500
track11      0.979167
Name: (102666, 105709),

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.285714
track02      0.357143
track03      0.833333
track10      0.388889
track11      0.384615
Name: (100558, 109651), dtype: float64
Possível duplicata
duplicata    1.00
title        0.52
artist       1.00
track01      1.00
track02      1.00
track03      1.00
track10      1.00
track11      1.00
Name: (5780, 5782), dtype: float64
cont < 20: True - continua: True
orcamento: 74 - jan_inic_dup: 39 - jan_fin_ndup: 42
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.740741
track02      0.555556
track03      0.666667
track10      0.350000
track11      0.555556
Name: (101021, 109651), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.950000
artist       0.777778
track01      0.937500
track02      0.947368
track03      1.000000
track10      1.000000
track11      1.000000
Name: (10314, 2427), dtype: 

  res = shell.run_cell(code, store_history=store_history, silent=silent)


##################################################################
Analisando o arquivo: diverg(10)126_NEW.csv
##################################################################
cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.512222
artist       1.000000
track01      0.086957
track02      0.190476
track03      0.107143
track10      0.225806
track11      0.204082
Name: (101556, 105525), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       0.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (1858, 9637), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.518182
artist       1.000000
track01      0.179487
track02      0.244898
track03      0

igual: 1
F1 anterior: 0.9888888888888889 - F1 atual: 0.9888888888888889
igual: 1
Possível não-duplicata
duplicata    0.000000
title        0.700000
artist       1.000000
track01      0.258065
track02      0.205128
track03      0.175439
track10      0.181818
track11      0.213115
Name: (109420, 109429), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.566667
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (2890, 2891), dtype: float64
igual: 2
F1 anterior: 0.9888888888888889 - F1 atual: 0.9888888888888889
igual: 2
Possível não-duplicata
duplicata    0.000000
title        0.733333
artist       1.000000
track01      0.233333
track02      0.161290
track03      0.156863
track10      0.200000
track11      0.241379
Name: (103155, 106396), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.884615
track02      0.727273
tra

  res = shell.run_cell(code, store_history=store_history, silent=silent)


cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.500000
track02      0.454545
track03      0.818182
track10      0.000000
track11      0.000000
Name: (105349, 105518), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (7008, 8831), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.416667
track02      0.909091
track03      0.500000
track10      0.636364
track11      0.888889
Name: (105349, 3710), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       0.0
track01      1.0
tr

igual: 1
F1 anterior: 1.0 - F1 atual: 1.0
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.066667
track02      0.650000
track03      0.578947
track10      0.500000
track11      0.285714
Name: (102514, 108429), dtype: float64
Possível duplicata
duplicata    0.000000
title        0.845238
artist       1.000000
track01      0.849057
track02      0.851064
track03      0.683333
track10      0.418182
track11      0.519231
Name: (109296, 109299), dtype: float64
f1_atual < f1_anterior
F1 anterior: 1.0 - F1 atual: 0.9444444444444444
F1 anterior: 1.0 - F1 atual: 1.0
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.250000
track02      0.714286
track03      0.500000
track10      0.388889
track11      0.812500
Name: (100558, 2004), dtype: float64
Possível duplicata
duplicata    1.00
title        0.52
artist       1.00
track01      1.00
track02      1.00
track03      1.00
t

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.500000
track02      0.454545
track03      0.818182
track10      0.000000
track11      0.000000
Name: (105349, 105518), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.148148
track11      0.333333
Name: (4384, 5657), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.416667
track02      0.909091
track03      0.500000
track10      0.636364
track11      0.888889
Name: (105349, 2004), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       0.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (1858, 9637)

igual: 2
F1 anterior: 1.0 - F1 atual: 1.0
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.250000
track02      0.714286
track03      0.500000
track10      0.388889
track11      0.812500
Name: (100558, 2004), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.952381
track11      1.000000
Name: (2026, 4601), dtype: float64
f1_atual < f1_anterior
F1 anterior: 1.0 - F1 atual: 0.9571428571428572
F1 anterior: 1.0 - F1 atual: 1.0
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.304348
track02      0.714286
track03      0.625000
track10      0.294118
track11      0.238095
Name: (102477, 104310), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.566667
artist       1.000000
track01      1.000000
track02      1.000000
tra

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      0.928571
track03      0.916667
track10      0.857143
track11      0.933333
Name: (6944, 9868), dtype: float64
cont < 20: True - continua: True
orcamento: 78 - jan_inic_dup: 33 - jan_fin_ndup: 36
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.714286
track02      0.304348
track03      0.533333
track10      0.833333
track11      0.529412
Name: (100446, 101502), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.869565
track02      0.875000
track03      0.866667
track10      0.888889
track11      0.900000
Name: (3670, 7515), dtype: float64
cont < 20: True - continua: True
orcamento: 76 - jan_inic_dup: 36 - jan_fin_ndup: 39
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
art

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível não-duplicata
duplicata    0.000000
title        0.666667
artist       0.000000
track01      0.350000
track02      0.612903
track03      0.631579
track10      0.500000
track11      0.633333
Name: (101555, 106511), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.666667
artist       1.000000
track01      0.933333
track02      1.000000
track03      0.947368
track10      1.000000
track11      0.947368
Name: (8370, 9876), dtype: float64
cont < 20: True - continua: True
orcamento: 62 - jan_inic_dup: 57 - jan_fin_ndup: 60
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.631579
track02      0.583333
track03      0.391304
track10      0.823529
track11      0.642857
Name: (103056, 104842), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (7192, 7

  res = shell.run_cell(code, store_history=store_history, silent=silent)


cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.800000
artist       0.000000
track01      0.375000
track02      0.571429
track03      0.333333
track10      0.266667
track11      0.333333
Name: (102987, 107779), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.148148
track11      0.333333
Name: (4384, 5657), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       1.000000
track01      0.153846
track02      0.210526
track03      0.162162
track10      0.000000
track11      0.000000
Name: (104145, 109608), dtype: float64
Possível duplicata
duplicata    1.0
title      

F1 anterior: 0.9292063492063493 - F1 atual: 0.9349206349206349
igual: 0
Possível não-duplicata
duplicata    0.000000
title        0.983333
artist       0.555556
track01      1.000000
track02      0.846154
track03      0.956522
track10      0.833333
track11      0.944444
Name: (104097, 109733), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.952381
track11      1.000000
Name: (2026, 4601), dtype: float64
F1 anterior: 0.9349206349206349 - F1 atual: 0.9444444444444444
igual: 0
Possível não-duplicata
duplicata    0.000000
title        0.640476
artist       1.000000
track01      0.176471
track02      0.156250
track03      0.200000
track10      0.157895
track11      0.000000
Name: (103663, 103676), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.566667
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      0.933333
track03      1.000000
track10      1.000000
track11      1.000000
Name: (8368, 9076), dtype: float64
cont < 20: True - continua: True
orcamento: 84 - jan_inic_dup: 24 - jan_fin_ndup: 27
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.807692
track02      0.434783
track03      0.700000
track10      0.538462
track11      0.687500
Name: (104117, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.900000
artist       1.000000
track01      1.000000
track02      0.909091
track03      0.600000
track10      0.875000
track11      0.958333
Name: (4586, 7633), dtype: float64
cont < 20: True - continua: True
orcamento: 82 - jan_inic_dup: 27 - jan_fin_ndup: 30
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artis

F1 anterior: 0.8790476190476191 - F1 atual: 0.9488888888888889
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.250000
track02      0.714286
track03      0.500000
track10      0.388889
track11      0.812500
Name: (100558, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.644444
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (4441, 7769), dtype: float64
igual: 1
F1 anterior: 0.9488888888888889 - F1 atual: 0.9488888888888889
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.304348
track02      0.714286
track03      0.625000
track10      0.294118
track11      0.238095
Name: (102477, 104310), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      0.866667
track03      1.000000
track10      1.000000
track11      1.000000
Name: (5477, 8368), dtype: float64
cont < 20: True - continua: True
orcamento: 92 - jan_inic_dup: 12 - jan_fin_ndup: 15
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.555556
track02      0.318182
track03      0.333333
track10      0.200000
track11      0.666667
Name: (101480, 104418), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.975000
artist       1.000000
track01      1.000000
track02      1.000000
track03      0.823529
track10      0.785714
track11      1.000000
Name: (6740, 9193), dtype: float64
cont < 20: True - continua: True
orcamento: 90 - jan_inic_dup: 15 - jan_fin_ndup: 18
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title  

Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.884615
track02      0.727273
track03      0.833333
track10      0.750000
track11      0.769231
Name: (4441, 8151), dtype: float64
igual: 1
F1 anterior: 0.9346031746031745 - F1 atual: 0.9346031746031745
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.769231
track02      0.521739
track03      0.785714
track10      0.928571
track11      1.000000
Name: (103997, 104117), dtype: float64
Possível duplicata
duplicata    0.000000
title        0.845238
artist       1.000000
track01      0.849057
track02      0.851064
track03      0.683333
track10      0.418182
track11      0.519231
Name: (109296, 109299), dtype: float64
f1_atual < f1_anterior
F1 anterior: 0.9346031746031745 - F1 atual: 0.9034920634920635
F1 anterior: 0.9346031746031745 - F1 atual: 0.9346031746031745
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível duplicata
duplicata    1.0
title        1.0
artist       0.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (1858, 9637), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.416667
track02      0.909091
track03      0.500000
track10      0.636364
track11      0.888889
Name: (105349, 2004), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      0.979167
track03      0.973684
track10      0.357143
track11      0.535211
Name: (3159, 9969), dtype: float64
cont < 20: True - continua: True
orcamento: 96 - jan_inic_dup: 6 - jan_fin_ndup: 9
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01  

f1_atual < f1_anterior
F1 anterior: 0.9514285714285714 - F1 atual: 0.9314285714285714
F1 anterior: 0.9514285714285714 - F1 atual: 0.9514285714285714
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.066667
track02      0.650000
track03      0.578947
track10      0.500000
track11      0.285714
Name: (102514, 108429), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.644444
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (4441, 7769), dtype: float64
f1_atual < f1_anterior
F1 anterior: 0.9514285714285714 - F1 atual: 0.9314285714285714
O f1 passou a piorar
F1 anterior: 0.9514285714285714 - F1 atual: 0.9514285714285714
##################################################################
Analisando o arquivo: diverg(10)134_NEW.csv
##################################################################
cont < 20: True - continua

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.300000
track02      0.466667
track03      0.500000
track10      0.818182
track11      0.421053
Name: (100717, 105098), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.980952
artist       1.000000
track01      0.937500
track02      0.952381
track03      0.720000
track10      0.677419
track11      0.941176
Name: (7980, 9790), dtype: float64
cont < 20: True - continua: True
orcamento: 66 - jan_inic_dup: 51 - jan_fin_ndup: 54
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.200000
track02      0.428571
track03      0.476190
track10      0.611111
track11      0.619048
Name: (100558, 102831), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.894737
track02      0.782609
track03      0.903226
track10      0.800000
track11      0.866

  res = shell.run_cell(code, store_history=store_history, silent=silent)


##################################################################
Analisando o arquivo: diverg(10)135_NEW.csv
##################################################################
cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.666667
track02      0.500000
track03      0.600000
track10      0.500000
track11      0.615385
Name: (100558, 105098), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.148148
track11      0.333333
Name: (4384, 5657), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.304

F1 anterior: 0.8796031746031746 - F1 atual: 0.9464285714285715
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       1.000000
track01      0.129032
track02      0.052632
track03      0.050000
track10      0.533333
track11      0.344262
Name: (104009, 109608), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      0.909091
track03      0.705882
track10      1.000000
track11      1.000000
Name: (7363, 8118), dtype: float64
f1_atual < f1_anterior
F1 anterior: 0.9464285714285715 - F1 atual: 0.9407142857142857
F1 anterior: 0.9464285714285715 - F1 atual: 0.9464285714285715
igual: 0
Possível não-duplicata
duplicata    1.000000
title        1.000000
artist       0.777778
track01      0.642857
track02      0.666667
track03      0.791667
track10      0.833333
track11      0.705882
Name: (7460, 8578), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.644444
artis

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.869565
track02      0.875000
track03      0.866667
track10      0.888889
track11      0.900000
Name: (3670, 7515), dtype: float64
cont < 20: True - continua: True
orcamento: 78 - jan_inic_dup: 33 - jan_fin_ndup: 36
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.714286
track02      0.304348
track03      0.533333
track10      0.833333
track11      0.529412
Name: (100446, 101502), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.955556
artist       0.694444
track01      1.000000
track02      1.000000
track03      0.968750
track10      1.000000
track11      0.950000
Name: (3096, 3123), dtype: float64
cont < 20: True - continua: True
orcamento: 76 - jan_inic_dup: 36 - jan_fin_ndup: 39
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title  

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      0.882353
track10      1.000000
track11      1.000000
Name: (2406, 7291), dtype: float64
cont < 20: True - continua: True
orcamento: 68 - jan_inic_dup: 48 - jan_fin_ndup: 51
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.300000
track02      0.466667
track03      0.500000
track10      0.818182
track11      0.421053
Name: (100717, 105098), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.980952
artist       1.000000
track01      0.937500
track02      0.952381
track03      0.720000
track10      0.677419
track11      0.941176
Name: (7980, 9790), dtype: float64
cont < 20: True - continua: True
orcamento: 66 - jan_inic_dup: 51 - jan_fin_ndup: 54
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
art

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Início
orcamento: 60
Quantidade de itens no conjunto de treinamento: 40
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.384615
track02      0.391304
track03      0.666667
track10      0.578947
track11      0.823529
Name: (100446, 104396), dtype: float64
Possível duplicata
duplicata    0.000000
title        0.845238
artist       1.000000
track01      0.849057
track02      0.851064
track03      0.683333
track10      0.418182
track11      0.519231
Name: (109296, 109299), dtype: float64
f1_atual < f1_anterior
F1 anterior: 1.0 - F1 atual: 0.9492063492063492
F1 anterior: 1.0 - F1 atual: 1.0
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.333333
track02      0.600000
track03      0.800000
track10      0.777778
track11      0.812500
Name: (102670, 2004), dtype: float64
Possível duplicata
duplicata    1.00
title        0.52
artist       1.00
track01      1.00
track02      1.0

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.807692
track02      0.434783
track03      0.700000
track10      0.538462
track11      0.687500
Name: (104117, 3710), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       0.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (1858, 9637), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.300000
track02      0.466667
track03      0.500000
track10      0.818182
track11      0.421053
Name: (100717, 105098), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.148148
track11      0.333333
Name: (4384, 5657)

F1 anterior: 0.9234920634920635 - F1 atual: 0.9044444444444444
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.555556
track02      0.428571
track03      0.500000
track10      0.700000
track11      0.642857
Name: (101021, 103424), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.436111
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.920000
track11      1.000000
Name: (10312, 9807), dtype: float64
F1 anterior: 0.9044444444444444 - F1 atual: 0.8615873015873017
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.434783
track02      0.700000
track03      0.600000
track10      0.647059
track11      0.714286
Name: (104310, 109749), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.952381
artist       1.000000
track01      0.812500
track02      0.850000
track03      0.82352

  res = shell.run_cell(code, store_history=store_history, silent=silent)
  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    0.000000
title        0.800000
artist       1.000000
track01      1.000000
track02      0.117647
track03      0.227273
track10      0.081633
track11      0.060606
Name: (104789, 105642), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       0.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (2187, 2418), dtype: float64
cont < 20: True - continua: True
orcamento: 96 - jan_inic_dup: 6 - jan_fin_ndup: 9
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.00
title        0.65
artist       1.00
track01      1.00
track02      0.35
track03      0.35
track10      0.30
track11      0.30
Name: (101515, 101521), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.933333
track02      1.000000
track03      1.000000
track10      0.900000
track11      1.000000
Name: (10240, 7367), dtype: float64
cont < 20: True - continua: True
o

Possível duplicata
duplicata    1.000000
title        0.900000
artist       1.000000
track01      1.000000
track02      0.909091
track03      0.600000
track10      0.875000
track11      0.958333
Name: (4586, 7633), dtype: float64
cont < 20: True - continua: True
orcamento: 86 - jan_inic_dup: 21 - jan_fin_ndup: 24
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.666667
artist       0.000000
track01      0.440000
track02      0.818182
track03      0.318182
track10      0.750000
track11      0.272727
Name: (104289, 107734), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.933333
artist       1.000000
track01      0.942857
track02      1.000000
track03      0.916667
track10      0.952381
track11      0.913043
Name: (6802, 8890), dtype: float64
cont < 20: True - continua: True
orcamento: 84 - jan_inic_dup: 24 - jan_fin_ndup: 27
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title  

Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.884615
track02      0.727273
track03      0.833333
track10      0.750000
track11      0.769231
Name: (4441, 8151), dtype: float64
f1_atual < f1_anterior
F1 anterior: 1.0 - F1 atual: 0.9571428571428572
F1 anterior: 1.0 - F1 atual: 1.0
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.458333
track02      0.909091
track03      0.250000
track10      0.875000
track11      0.562500
Name: (105076, 2004), dtype: float64
Possível duplicata
duplicata    0.000000
title        0.845238
artist       1.000000
track01      0.849057
track02      0.851064
track03      0.683333
track10      0.418182
track11      0.519231
Name: (109296, 109299), dtype: float64
F1 anterior: 1.0 - F1 atual: 0.9077777777777778
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.400000
track02      0.5000

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.000000
title        0.933333
artist       1.000000
track01      0.952381
track02      1.000000
track03      1.000000
track10      0.944444
track11      1.000000
Name: (2813, 3155), dtype: float64
cont < 20: True - continua: True
orcamento: 78 - jan_inic_dup: 33 - jan_fin_ndup: 36
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.269231
track02      0.677419
track03      0.684211
track10      0.384615
track11      0.523810
Name: (104117, 106511), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.869565
track02      0.875000
track03      0.866667
track10      0.888889
track11      0.900000
Name: (3670, 7515), dtype: float64
cont < 20: True - continua: True
orcamento: 76 - jan_inic_dup: 36 - jan_fin_ndup: 39
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.533333
art

Possível não-duplicata
duplicata    0.000000
title        0.666667
artist       0.000000
track01      0.500000
track02      0.678571
track03      0.583333
track10      0.588235
track11      0.333333
Name: (101555, 104099), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.950000
artist       0.777778
track01      0.937500
track02      0.947368
track03      1.000000
track10      1.000000
track11      1.000000
Name: (10314, 2427), dtype: float64
f1_atual < f1_anterior
F1 anterior: 1.0 - F1 atual: 0.9666666666666667
O f1 passou a piorar
F1 anterior: 1.0 - F1 atual: 1.0
##################################################################
Analisando o arquivo: diverg(10)142_NEW.csv
##################################################################
cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.512222
artist       1.000000
track01      0.

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    0.000000
title        0.900000
artist       1.000000
track01      0.535714
track02      0.195122
track03      0.051282
track10      0.205882
track11      0.440000
Name: (109040, 109732), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.777778
track02      0.906250
track03      0.769231
track10      0.884615
track11      0.863636
Name: (1884, 9542), dtype: float64
cont < 20: True - continua: True
orcamento: 70 - jan_inic_dup: 45 - jan_fin_ndup: 48
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.714286
artist       1.000000
track01      0.206897
track02      0.133333
track03      0.184211
track10      0.179487
track11      0.175000
Name: (102390, 109139), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      0.882353
track10      1.000000
track11      1.000

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.714286
track02      0.304348
track03      0.533333
track10      0.833333
track11      0.529412
Name: (100446, 101502), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.869565
track02      0.875000
track03      0.866667
track10      0.888889
track11      0.900000
Name: (3670, 7515), dtype: float64
cont < 20: True - continua: True
orcamento: 76 - jan_inic_dup: 36 - jan_fin_ndup: 39
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.285714
track02      0.357143
track03      0.833333
track10      0.388889
track11      0.384615
Name: (100558, 109651), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       0.777778
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (9333, 9450), dtype: float64
cont < 20: True - continua: True
orcamento: 68 - jan_inic_dup: 48 - jan_fin_ndup: 51
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.833333
track02      0.260870
track03      0.666667
track10      0.846154
track11      0.523810
Name: (102831, 104117), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.980952
artist       1.000000
track01      0.937500
track02      0.952381
track03      0.720000
track10      0.677419
track11      0.941176
Name: (7980, 9790), dtype: float64
cont < 20: True - continua: True
orcamento: 66 - jan_inic_dup: 51 - jan_fin_ndup: 54
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
tra

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.833333
track02      0.260870
track03      0.666667
track10      0.846154
track11      0.523810
Name: (102831, 104117), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.884615
track02      0.727273
track03      0.833333
track10      0.750000
track11      0.769231
Name: (4441, 8151), dtype: float64
cont < 20: True - continua: True
orcamento: 66 - jan_inic_dup: 51 - jan_fin_ndup: 54
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.333333
track02      0.346154
track03      0.363636
track10      0.612903
track11      0.363636
Name: (101021, 105066), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.966667
artist       1.000000
track01      0.800000
track02      0.928571
track03      0.916667
track10      0.714

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    0.000000
title        0.600000
artist       0.944444
track01      0.225806
track02      0.173913
track03      0.000000
track10      0.173913
track11      0.058824
Name: (104512, 109129), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (4291, 4905), dtype: float64
cont < 20: True - continua: True
orcamento: 86 - jan_inic_dup: 21 - jan_fin_ndup: 24
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.428571
track02      0.416667
track03      0.647059
track10      0.583333
track11      1.000000
Name: (104872, 109651), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      0.933333
track03      1.000000
track10      1.000000
track11      1.000000
Name: (8368, 9076), dtype: float64
c

igual: 2
F1 anterior: 0.9857142857142857 - F1 atual: 0.9857142857142857
igual: 2
Possível não-duplicata
duplicata    0.000000
title        0.700000
artist       1.000000
track01      0.269231
track02      0.206897
track03      0.107143
track10      0.181818
track11      0.135135
Name: (101931, 105268), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (2252, 4733), dtype: float64
igual: 3
F1 anterior: 0.9857142857142857 - F1 atual: 0.9857142857142857
igual: 3
Possível não-duplicata
duplicata    0.000000
title        0.746667
artist       1.000000
track01      0.230769
track02      0.166667
track03      0.107143
track10      0.222222
track11      0.125000
Name: (101243, 101298), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.952381

  res = shell.run_cell(code, store_history=store_history, silent=silent)



cont < 20: True - continua: True
orcamento: 68 - jan_inic_dup: 48 - jan_fin_ndup: 51
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.461538
track02      0.608696
track03      0.642857
track10      0.705882
track11      0.785714
Name: (103056, 104117), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.666667
artist       1.000000
track01      0.933333
track02      1.000000
track03      0.947368
track10      1.000000
track11      0.947368
Name: (8370, 9876), dtype: float64
cont < 20: True - continua: True
orcamento: 66 - jan_inic_dup: 51 - jan_fin_ndup: 54
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.766667
track02      0.666667
track03      0.600000
track10      0.636364
track11      0.842105
Name: (100717, 3710), dtype: float64
Possível duplicata
duplicata    1.0
title    

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível duplicata
duplicata    1.000000
title        0.666667
artist       1.000000
track01      0.933333
track02      1.000000
track03      0.947368
track10      1.000000
track11      0.947368
Name: (8370, 9876), dtype: float64
cont < 20: True - continua: True
orcamento: 62 - jan_inic_dup: 57 - jan_fin_ndup: 60
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.312500
track02      0.434783
track03      0.466667
track10      0.600000
track11      0.611111
Name: (100446, 107779), dtype: float64
Possível duplicata
duplicata    1.00000
title        1.00000
artist       1.00000
track01      1.00000
track02      0.52381
track03      1.00000
track10      1.00000
track11      1.00000
Name: (1838, 7934), dtype: float64
Início
orcamento: 60
Quantidade de itens no conjunto de treinamento: 40
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.384615
tr

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Início
orcamento: 60
Quantidade de itens no conjunto de treinamento: 40
Possível não-duplicata
duplicata    0.000000
title        0.637143
artist       1.000000
track01      0.250000
track02      0.133333
track03      0.205882
track10      0.187500
track11      0.176471
Name: (100655, 107814), dtype: float64
Possível duplicata
duplicata    1.0000
title        1.0000
artist       1.0000
track01      1.0000
track02      0.9375
track03      1.0000
track10      1.0000
track11      1.0000
Name: (3539, 4190), dtype: float64
F1 anterior: 0.9388888888888889 - F1 atual: 0.9188888888888889
igual: 0
Possível não-duplicata
duplicata    0.000000
title        0.666667
artist       1.000000
track01      0.224138
track02      0.194444
track03      0.228571
track10      0.173077
track11      0.138889
Name: (106868, 108869), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.644444
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.952381
track11      1.000000
Name: (2026, 4601), dtype: float64
Início
orcamento: 60
Quantidade de itens no conjunto de treinamento: 40
Possível não-duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.555556
track02      0.529412
track03      1.000000
track10      0.548387
track11      0.315789
Name: (4676, 8097), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.566667
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (2890, 2891), dtype: float64
F1 anterior: 0.9088888888888889 - F1 atual: 0.9203174603174603
igual: 0
Possível não-duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      0.0
Nam

  res = shell.run_cell(code, store_history=store_history, silent=silent)


##################################################################
Analisando o arquivo: diverg(10)150_NEW.csv
##################################################################
cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.500000
track02      0.454545
track03      0.818182
track10      0.000000
track11      0.000000
Name: (105349, 105518), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.148148
track11      0.333333
Name: (4384, 5657), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.416

igual: 1
F1 anterior: 0.9371428571428572 - F1 atual: 0.9371428571428572
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.346154
track02      0.521739
track03      0.764706
track10      0.769231
track11      0.454545
Name: (104117, 104872), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.420000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      0.947368
Name: (11531, 3134), dtype: float64
F1 anterior: 0.9371428571428572 - F1 atual: 0.9434920634920635
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.666667
track02      0.285714
track03      0.555556
track10      0.259259
track11      0.250000
Name: (103056, 103830), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.436111
artist       1.000000
track01      1.000000
track02      1.000000
track03    

  res = shell.run_cell(code, store_history=store_history, silent=silent)
  res = shell.run_cell(code, store_history=store_history, silent=silent)


cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.350000
artist       1.000000
track01      0.642857
track02      0.440000
track03      0.153846
track10      0.739130
track11      0.473684
Name: (102349, 103832), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.148148
track11      0.333333
Name: (4384, 5657), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.800000
artist       1.000000
track01      1.000000
track02      0.117647
track03      0.227273
track10      0.081633
track11      0.060606
Name: (104789, 105642), dtype: float64
Possível duplicata
duplicata    1.0
title      

Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.777778
track02      0.727273
track03      0.875000
track10      0.833333
track11      0.333333
Name: (102477, 103398), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.777778
track02      0.906250
track03      0.769231
track10      0.884615
track11      0.863636
Name: (1884, 9542), dtype: float64
cont < 20: True - continua: True
orcamento: 72 - jan_inic_dup: 42 - jan_fin_ndup: 45
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.818182
track02      0.636364
track03      0.312500
track10      0.750000
track11      0.600000
Name: (103398, 105076), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.980952
artist       1.000000
track01      0.937500
track02      0.952381
track03      0.720000
track10      0.677

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Início
orcamento: 60
Quantidade de itens no conjunto de treinamento: 40
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.230769
track02      0.608696
track03      0.642857
track10      0.722222
track11      0.846154
Name: (100558, 104117), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.500000
track02      0.285714
track03      0.750000
track10      0.612903
track11      0.200000
Name: (4676, 5746), dtype: float64
F1 anterior: 0.8838095238095238 - F1 atual: 0.8838095238095238
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.222222
track02      0.642857
track03      0.800000
track10      0.750000
track11      0.692308
Name: (100558, 101021), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.879167
artist       1.000000
track01      0.750000
track02      0.750000
track03      0.735294

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.000000
title        1.000000
artist       0.777778
track01      0.642857
track02      0.666667
track03      0.791667
track10      0.833333
track11      0.705882
Name: (7460, 8578), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (7192, 7778), dtype: float64
Início
orcamento: 60
Quantidade de itens no conjunto de treinamento: 40
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       1.000000
track01      0.000000
track02      0.041667
track03      0.050000
track10      0.607143
track11      0.338710
Name: (104009, 109714), dtype: float64
Possível duplicata
duplicata    1.00000
title        1.00000
artist       1.00000
track01      1.00000
track02      0.52381
track03      1.00000
track10      1.00000
track11      1.00000
Name: (1838, 7934), dtype: float64
F1 anterior: 0.9296031746031745 - F1 atual: 0.9101587301587302
igu

  res = shell.run_cell(code, store_history=store_history, silent=silent)


cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.807692
track02      0.434783
track03      0.700000
track10      0.538462
track11      0.687500
Name: (104117, 3710), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (7008, 8831), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.233333
track02      0.483871
track03      0.578947
track10      0.454545
track11      0.714286
Name: (100717, 106511), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
trac

igual: 1
F1 anterior: 1.0 - F1 atual: 1.0
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.454545
track02      0.714286
track03      0.428571
track10      0.550000
track11      0.647059
Name: (100187, 109749), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (7134, 9695), dtype: float64
igual: 2
F1 anterior: 1.0 - F1 atual: 1.0
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.703704
track02      0.375000
track03      0.347826
track10      0.750000
track11      0.727273
Name: (101021, 104842), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.960000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      0.882353
Name: (2188, 8063), dtype: float64
ig

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (7008, 8831), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.300000
track02      0.466667
track03      0.500000
track10      0.818182
track11      0.421053
Name: (100717, 105098), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.148148
track11      0.333333
Name: (4384, 5657), dtype: float64
cont < 20: True - continua: True
orcamento: 96 - jan_inic_dup: 6 - jan_fin_ndup: 9
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01

igual: 1
F1 anterior: 1.0 - F1 atual: 1.0
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.555556
track02      0.428571
track03      0.500000
track10      0.700000
track11      0.642857
Name: (101021, 103424), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (7134, 9695), dtype: float64
igual: 2
F1 anterior: 1.0 - F1 atual: 1.0
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.434783
track02      0.700000
track03      0.600000
track10      0.647059
track11      0.714286
Name: (104310, 109749), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.644444
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (4441, 7769), dtype: float64
ig

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.0
title        1.0
artist       0.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (1858, 9637), dtype: float64
cont < 20: True - continua: True
orcamento: 96 - jan_inic_dup: 6 - jan_fin_ndup: 9
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.461538
track02      0.409091
track03      0.833333
track10      0.263158
track11      0.687500
Name: (101480, 104396), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.933333
track02      1.000000
track03      1.000000
track10      0.900000
track11      1.000000
Name: (10240, 7367), dtype: float64
cont < 20: True - continua: True
orcamento: 94 - jan_inic_dup: 9 - jan_fin_ndup: 12
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.769231
t

igual: 1
F1 anterior: 1.0 - F1 atual: 1.0
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.250000
track02      0.714286
track03      0.500000
track10      0.388889
track11      0.812500
Name: (100558, 2004), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.644444
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (4441, 7769), dtype: float64
igual: 2
F1 anterior: 1.0 - F1 atual: 1.0
igual: 2
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.304348
track02      0.714286
track03      0.625000
track10      0.294118
track11      0.238095
Name: (102477, 104310), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.952381
track11      1.000000

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.000000
title        0.900000
artist       1.000000
track01      1.000000
track02      0.909091
track03      0.600000
track10      0.875000
track11      0.958333
Name: (4586, 7633), dtype: float64
cont < 20: True - continua: True
orcamento: 84 - jan_inic_dup: 24 - jan_fin_ndup: 27
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.807692
track02      0.434783
track03      0.700000
track10      0.538462
track11      0.687500
Name: (104117, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.933333
artist       1.000000
track01      0.942857
track02      1.000000
track03      0.916667
track10      0.952381
track11      0.913043
Name: (8890, 9063), dtype: float64
cont < 20: True - continua: True
orcamento: 82 - jan_inic_dup: 27 - jan_fin_ndup: 30
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artis

igual: 1
F1 anterior: 0.9346031746031745 - F1 atual: 0.9346031746031745
igual: 1
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.458333
track02      0.909091
track03      0.250000
track10      0.875000
track11      0.562500
Name: (105076, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.922222
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.933333
track11      0.709677
Name: (4108, 7977), dtype: float64
F1 anterior: 0.9346031746031745 - F1 atual: 0.9571428571428571
igual: 0
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.400000
track02      0.500000
track03      0.578947
track10      0.642857
track11      0.846154
Name: (102670, 103997), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.966667
artist       1.000000
track01      0.800000
track02      0.928571
track03      0

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.000000
title        1.000000
artist       0.777778
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (10314, 8640), dtype: float64
cont < 20: True - continua: True
orcamento: 74 - jan_inic_dup: 39 - jan_fin_ndup: 42
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.709524
artist       0.000000
track01      0.600000
track02      0.653846
track03      0.272727
track10      0.290323
track11      0.217391
Name: (105066, 109426), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.777778
track02      0.906250
track03      0.769231
track10      0.884615
track11      0.863636
Name: (1884, 9542), dtype: float64
cont < 20: True - continua: True
orcamento: 72 - jan_inic_dup: 42 - jan_fin_ndup: 45
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.790476
ar

  res = shell.run_cell(code, store_history=store_history, silent=silent)



cont < 20: True - continua: True
orcamento: 64 - jan_inic_dup: 54 - jan_fin_ndup: 57
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.851852
track02      0.818182
track03      0.363636
track10      0.650000
track11      0.409091
Name: (101021, 104289), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.666667
artist       1.000000
track01      0.933333
track02      1.000000
track03      0.947368
track10      1.000000
track11      0.947368
Name: (8370, 9876), dtype: float64
cont < 20: True - continua: True
orcamento: 62 - jan_inic_dup: 57 - jan_fin_ndup: 60
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.333333
track02      0.600000
track03      0.800000
track10      0.777778
track11      0.812500
Name: (102670, 2004), dtype: float64
Possível duplicata
duplicata    1.0
title    

  res = shell.run_cell(code, store_history=store_history, silent=silent)


##################################################################
Analisando o arquivo: diverg(10)160_NEW.csv
##################################################################
cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.666667
track02      0.500000
track03      0.600000
track10      0.500000
track11      0.615385
Name: (100558, 105098), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       0.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (2187, 2418), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.304348
track02      0.571429
track03      0

Início
orcamento: 60
Quantidade de itens no conjunto de treinamento: 40
Possível não-duplicata
duplicata    0.000000
title        0.637143
artist       1.000000
track01      0.250000
track02      0.133333
track03      0.205882
track10      0.187500
track11      0.176471
Name: (100655, 107814), dtype: float64
Possível duplicata
duplicata    1.0000
title        1.0000
artist       1.0000
track01      1.0000
track02      0.9375
track03      1.0000
track10      1.0000
track11      1.0000
Name: (3539, 4190), dtype: float64
F1 anterior: 0.9142857142857144 - F1 atual: 0.9466666666666667
igual: 0
Possível não-duplicata
duplicata    0.000000
title        0.666667
artist       1.000000
track01      0.224138
track02      0.194444
track03      0.228571
track10      0.173077
track11      0.138889
Name: (106868, 108869), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (2252

  res = shell.run_cell(code, store_history=store_history, silent=silent)


cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.666667
track02      0.500000
track03      0.600000
track10      0.500000
track11      0.615385
Name: (100558, 105098), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.148148
track11      0.333333
Name: (4384, 5657), dtype: float64
cont < 20: True - continua: True
orcamento: 98 - jan_inic_dup: 3 - jan_fin_ndup: 6
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.304348
track02      0.571429
track03      0.555556
track10      0.588235
track11      0.761905
Name: (104310, 107056), dtype: float64
Possível duplicata
duplicata    1.0
title      

f1_atual < f1_anterior
F1 anterior: 0.9857142857142858 - F1 atual: 0.9634920634920635
F1 anterior: 0.9857142857142858 - F1 atual: 0.9857142857142858
igual: 0
Possível não-duplicata
duplicata    0.000000
title        0.666667
artist       1.000000
track01      0.224138
track02      0.194444
track03      0.228571
track10      0.173077
track11      0.138889
Name: (106868, 108869), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       1.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (7192, 7778), dtype: float64
F1 anterior: 0.9857142857142858 - F1 atual: 0.9888888888888889
igual: 0
Possível não-duplicata
duplicata    0.000000
title        0.660000
artist       1.000000
track01      0.235294
track02      0.170213
track03      0.172414
track10      0.142857
track11      0.171429
Name: (104734, 105358), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.600000
artist       1.000000
track01      1.0000

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.000000
title        0.955556
artist       0.694444
track01      1.000000
track02      1.000000
track03      0.968750
track10      1.000000
track11      0.950000
Name: (3096, 3123), dtype: float64
cont < 20: True - continua: True
orcamento: 76 - jan_inic_dup: 36 - jan_fin_ndup: 39
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.285714
track02      0.357143
track03      0.833333
track10      0.388889
track11      0.384615
Name: (100558, 109651), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       0.777778
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (10314, 8640), dtype: float64
cont < 20: True - continua: True
orcamento: 74 - jan_inic_dup: 39 - jan_fin_ndup: 42
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
ar

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.703704
track02      0.750000
track03      0.727273
track10      0.700000
track11      0.818182
Name: (101021, 103997), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.777778
track02      0.906250
track03      0.769231
track10      0.884615
track11      0.863636
Name: (1884, 9542), dtype: float64
cont < 20: True - continua: True
orcamento: 72 - jan_inic_dup: 42 - jan_fin_ndup: 45
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.240000
track02      0.714286
track03      0.454545
track10      0.777778
track11      0.590909
Name: (100558, 104289), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.980952
artist       1.000000
track01      0.937500
track02      0.952381
track03      0.720000
track10      0.677419
track11      0.941

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.869565
track02      0.875000
track03      0.866667
track10      0.888889
track11      0.900000
Name: (3670, 7515), dtype: float64
cont < 20: True - continua: True
orcamento: 76 - jan_inic_dup: 36 - jan_fin_ndup: 39
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.285714
track02      0.357143
track03      0.833333
track10      0.388889
track11      0.384615
Name: (100558, 109651), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       0.777778
track01      1.000000
track02      1.000000
track03      1.000000
track10      1.000000
track11      1.000000
Name: (10314, 8640), dtype: float64
cont < 20: True - continua: True
orcamento: 74 - jan_inic_dup: 39 - jan_fin_ndup: 42
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
ar

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.461538
track02      0.608696
track03      0.642857
track10      0.705882
track11      0.785714
Name: (103056, 104117), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.666667
artist       1.000000
track01      0.933333
track02      1.000000
track03      0.947368
track10      1.000000
track11      0.947368
Name: (8370, 9876), dtype: float64
cont < 20: True - continua: True
orcamento: 66 - jan_inic_dup: 51 - jan_fin_ndup: 54
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.766667
track02      0.666667
track03      0.600000
track10      0.636364
track11      0.842105
Name: (100717, 3710), dtype: float64
Possível duplicata
duplicata    1.00000
title        1.00000
artist       1.00000
track01      1.00000
track02      0.52381
track03      1.00000
track10      1.00000
track11      1.00000
Name: 

  res = shell.run_cell(code, store_history=store_history, silent=silent)


Possível duplicata
duplicata    1.000000
title        0.750000
artist       1.000000
track01      0.777778
track02      1.000000
track03      1.000000
track10      1.000000
track11      0.176471
Name: (7653, 8029), dtype: float64
cont < 20: True - continua: True
orcamento: 64 - jan_inic_dup: 54 - jan_fin_ndup: 57
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.312500
track02      0.434783
track03      0.466667
track10      0.600000
track11      0.611111
Name: (100446, 107779), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.433333
artist       1.000000
track01      1.000000
track02      1.000000
track03      0.823529
track10      1.000000
track11      1.000000
Name: (5204, 7514), dtype: float64
cont < 20: True - continua: True
orcamento: 62 - jan_inic_dup: 57 - jan_fin_ndup: 60
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title  

  res = shell.run_cell(code, store_history=store_history, silent=silent)



duplicata    0.000000
title        1.000000
artist       0.000000
track01      0.777778
track02      0.900000
track03      0.400000
track10      0.350000
track11      0.562500
Name: (101021, 3710), dtype: float64
Possível duplicata
duplicata    1.000000
title        1.000000
artist       1.000000
track01      0.777778
track02      0.906250
track03      0.769231
track10      0.884615
track11      0.863636
Name: (1884, 9542), dtype: float64
cont < 20: True - continua: True
orcamento: 90 - jan_inic_dup: 15 - jan_fin_ndup: 18
reexecuta: True - deslz_ndup >= 1: True
Possível não-duplicata
duplicata    0.000000
title        0.666667
artist       0.000000
track01      0.300000
track02      0.466667
track03      0.529412
track10      0.722222
track11      0.684211
Name: (100521, 100558), dtype: float64
Possível duplicata
duplicata    1.000000
title        0.666667
artist       1.000000
track01      0.933333
track02      1.000000
track03      0.947368
track10      1.000000
track11      0.94736

cont < 20: True - continua: True
orcamento: 100 - jan_inic_dup: 0 - jan_fin_ndup: 3
reexecuta: True - deslz_ndup >= 1: False
Possível não-duplicata
duplicata    1.000000
title        0.436111
artist       1.000000
track01      1.000000
track02      1.000000
track03      1.000000
track10      0.920000
track11      1.000000
Name: (10312, 9807), dtype: float64
Possível duplicata
duplicata    1.0
title        1.0
artist       0.0
track01      1.0
track02      1.0
track03      1.0
track10      1.0
track11      1.0
Name: (2187, 2418), dtype: float64
ERRO NA VALIDAÇÃO CRUZADA!
Conjunto de treinamento:
              duplicata  title  artist  track01  track02  track03  track10  \
(2187, 2418)        1.0    1.0     0.0      1.0      1.0      1.0      1.0   

              track11  
(2187, 2418)      1.0  


  res = shell.run_cell(code, store_history=store_history, silent=silent)


UnboundLocalError: local variable 'cv_results' referenced before assignment