In [1]:
import numpy as np
import pandas as pd
import matplotlib.pylab as pt
from sklearn import metrics
import seaborn as sns
import random
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from sklearn import svm
%matplotlib inline

## Funciones auxiliares

In [2]:
# Toma un conjunto de datos y lo separa en conjunto de datos positivos y conjunto de datos negtivos
def split_positive_negative(dataframe):
    positive_df = dataframe[dataframe['Dx:Cancer'] == 1]
    negative_df = dataframe[dataframe['Dx:Cancer'] == 0]
    return positive_df, negative_df

# Toma el conjunto de datos, el numero de muestras que deseas y si queremos o no reemplazamiento.
# Devuelve un conjunto de datos con las muestras seleccionadas.
def random_sample(dataframe, num_samples, replacement=False):
    list_values = [*range(dataframe.shape[0])]
    if replacement==True:
        values = random.choices(list_values, k=num_samples)
    else:
        values = random.sample(list_values, k=num_samples)
    sampled_df = dataframe.iloc[values]
    return sampled_df

# Toma un conjunto de datos y lo separa en x (variable cancer) e y (resto de variables), y lo convierte a np.array
def split_xy(dataframe):
    df_x = dataframe.copy()
    df_x.pop("Dx:Cancer")
    y = dataframe["Dx:Cancer"].to_numpy()
    x = df_x.to_numpy()
    return x, y

In [3]:
def analysis(algorithm, dataframe, num_samples, metric):
    # Separamos los datos entre positivos y negativos
    positive_df, negative_df = split_positive_negative(dataframe)
    # Tomamos k muestras aleatorias de los positivos
    positive_df = random_sample(positive_df, num_samples)
    # Tomamos k muestras aleatorias de los negativos
    negative_df = random_sample(negative_df, num_samples)
    # Juntamos todas las muestras en un conjunto
    df = pd.concat([positive_df, negative_df])
    # Dividimos el conjunto en x e y
    x_train, y_train = split_xy(df)
    # Elegimos el algoritmo
    if algorithm == 'NaiveBayes':
        model = GaussianNB()
    elif algorithm == 'DecisionTree':
        model = DecisionTreeClassifier()
    elif algorithm == 'RandomForest':
        model = RandomForestClassifier(n_estimators=100, max_features='auto')
    elif algorithm == 'SVM':
        model = svm.SVC(probability=True)
    # Entrenamos el algoritmo
    model.fit(x_train, y_train)
    # Preparamos el conjunto de validacion
    x_test, y_test = split_xy(dataframe)
    # Evaluamos el algoritmo en el conjunto de test
    y_pred = model.predict(x_test)
    # Calculamos la mtrica elegida
    if metric == 'f1-score':
        metric = metrics.f1_score(y_test, y_pred)
    elif metric == 'classification_report':
        metric = metrics.classification_report(y_test, y_pred, output_dict = True)
    elif metric == 'confusion_matrix':
        metric = metrics.confusion_matrix(y_test, y_pred)
    elif metric == 'auc_score':
        try:
            metric = metrics.roc_auc_score(y_test, model.predict_proba(x_test)[:, 1])
        except ValueError:
            return 0
    elif metric == 'roc_curve':
        metric = metrics.plot_roc_curve(model, x_test, y_test)
        pt.show()
        return None
    return metric

def evaluar_todo_el_set_de_datos(algorithm, dataframe, validation_split, metric):
    # Separamos el conjunto entre x e y
    x, y = split_xy(dataframe)
    # Dividimos entre conjunto de entrenamiento y conjunto de validacion
    
    sss = StratifiedShuffleSplit(n_splits=1, test_size=validation_split)
    sss.get_n_splits(x, y)
    for train_index, test_index in sss.split(x, y):
        x_train, x_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]
    
    
    #x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=validation_split)
    # Elegimos el algoritmo
    if algorithm == 'NaiveBayes':
        model = GaussianNB()
    elif algorithm == 'DecisionTree':
        model = DecisionTreeClassifier()
    elif algorithm == 'RandomForest':
        model = RandomForestClassifier(n_estimators=100, max_features='auto')
    elif algorithm == 'SVM':
        model = svm.SVC(probability=True)
    # Entrenamos el algoritmo
    model.fit(x_train, y_train)
    # Evaluamos el algoritmo en el conjunto de test
    y_pred = model.predict(x_test)
    # Calculamos la metrica elegida
    if metric == 'f1-score':
        metric = metrics.f1_score(y_test, y_pred)
    elif metric == 'classification_report':
        metric = metrics.classification_report(y_test, y_pred, output_dict=True)
    elif metric == 'confusion_matrix':
        metric = metrics.confusion_matrix(y_test, y_pred)
    elif metric == 'auc_score':
        #try:
        metric = metrics.roc_auc_score(y_test, model.predict_proba(x_test)[:, 1])
        #except ValueError:
        #    return 0
    elif metric == 'roc_curve':
        metric = metrics.plot_roc_curve(model, x_test, y_test)
        pt.show()
        return None
    return metric

##### Media de 500 evaluaciones 18 vs 18 muestras

In [4]:
df = pd.read_excel("datos.xlsx")
num_evaluaciones = 500

# NAIVE BAYES
f1_score = 0.0
auc_score = 0.0
confusion_matrix = np.zeros((2, 2))
classification_report = {'0': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}}
for i in range(num_evaluaciones):
    f1_score += analysis('NaiveBayes', df, 18, 'f1-score')
    auc_score += analysis('NaiveBayes', df, 18, 'auc_score')
    confusion_matrix += analysis('NaiveBayes', df, 18, 'confusion_matrix')
    report = analysis('NaiveBayes', df, 18, 'classification_report')
    classification_report['0']['precision'] += report['0']['precision']
    classification_report['0']['recall'] += report['0']['recall']
    classification_report['0']['f1-score'] += report['0']['f1-score']
    classification_report['1']['precision'] += report['1']['precision']
    classification_report['1']['recall'] += report['1']['recall']
    classification_report['1']['f1-score'] += report['1']['f1-score']
f1_score /= num_evaluaciones
auc_score /= num_evaluaciones
confusion_matrix /= num_evaluaciones
classification_report['0']['precision'] /= num_evaluaciones
classification_report['0']['recall'] /= num_evaluaciones
classification_report['0']['f1-score'] /= num_evaluaciones
classification_report['1']['precision'] /= num_evaluaciones
classification_report['1']['recall'] /= num_evaluaciones
classification_report['1']['f1-score'] /= num_evaluaciones
print("NAIVE BAYES: ")
print("Media de f1 score: " + str(f1_score))
print("Media de auc score: " + str(auc_score))
print("Media de confusion matrix: " + str(confusion_matrix))
print("Media de classification report: " + str(classification_report))
print(" --------------------------------------------")

#DECISION TREE
f1_score = 0.0
auc_score = 0.0
confusion_matrix = np.zeros((2, 2))
classification_report = {'0': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}}
for i in range(num_evaluaciones):
    f1_score += analysis('DecisionTree', df, 18, 'f1-score')
    auc_score += analysis('DecisionTree', df, 18, 'auc_score')
    confusion_matrix += analysis('DecisionTree', df, 18, 'confusion_matrix')
    report = analysis('DecisionTree', df, 18, 'classification_report')
    classification_report['0']['precision'] += report['0']['precision']
    classification_report['0']['recall'] += report['0']['recall']
    classification_report['0']['f1-score'] += report['0']['f1-score']
    classification_report['1']['precision'] += report['1']['precision']
    classification_report['1']['recall'] += report['1']['recall']
    classification_report['1']['f1-score'] += report['1']['f1-score']
f1_score /= num_evaluaciones
auc_score /= num_evaluaciones
confusion_matrix /= num_evaluaciones
classification_report['0']['precision'] /= num_evaluaciones
classification_report['0']['recall'] /= num_evaluaciones
classification_report['0']['f1-score'] /= num_evaluaciones
classification_report['1']['precision'] /= num_evaluaciones
classification_report['1']['recall'] /= num_evaluaciones
classification_report['1']['f1-score'] /= num_evaluaciones
print("DECISION TREE: ")
print("Media de f1 score: " + str(f1_score))
print("Media de auc score: " + str(auc_score))
print("Media de confusion matrix: " + str(confusion_matrix))
print("Media de classification report: " + str(classification_report))
print(" --------------------------------------------")

# RANDOM FOREST
f1_score = 0.0
auc_score = 0.0
confusion_matrix = np.zeros((2, 2))
classification_report = {'0': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}}
for i in range(num_evaluaciones):
    f1_score += analysis('RandomForest', df, 18, 'f1-score')
    auc_score += analysis('RandomForest', df, 18, 'auc_score')
    confusion_matrix += analysis('RandomForest', df, 18, 'confusion_matrix')
    report = analysis('RandomForest', df, 18, 'classification_report')
    classification_report['0']['precision'] += report['0']['precision']
    classification_report['0']['recall'] += report['0']['recall']
    classification_report['0']['f1-score'] += report['0']['f1-score']
    classification_report['1']['precision'] += report['1']['precision']
    classification_report['1']['recall'] += report['1']['recall']
    classification_report['1']['f1-score'] += report['1']['f1-score']
f1_score /= num_evaluaciones
auc_score /= num_evaluaciones
confusion_matrix /= num_evaluaciones
classification_report['0']['precision'] /= num_evaluaciones
classification_report['0']['recall'] /= num_evaluaciones
classification_report['0']['f1-score'] /= num_evaluaciones
classification_report['1']['precision'] /= num_evaluaciones
classification_report['1']['recall'] /= num_evaluaciones
classification_report['1']['f1-score'] /= num_evaluaciones
print("RANDOM FOREST: ")
print("Media de f1 score: " + str(f1_score))
print("Media de auc score: " + str(auc_score))
print("Media de confusion matrix: " + str(confusion_matrix))
print("Media de classification report: " + str(classification_report))
print(" --------------------------------------------")

# SUPPORT VECTOR MACHINE
f1_score = 0.0
auc_score = 0.0
confusion_matrix = np.zeros((2, 2))
classification_report = {'0': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}}
for i in range(num_evaluaciones):
    f1_score += analysis('SVM', df, 18, 'f1-score')
    auc_score += analysis('SVM', df, 18, 'auc_score')
    confusion_matrix += analysis('SVM', df, 18, 'confusion_matrix')
    report = analysis('SVM', df, 18, 'classification_report')
    classification_report['0']['precision'] += report['0']['precision']
    classification_report['0']['recall'] += report['0']['recall']
    classification_report['0']['f1-score'] += report['0']['f1-score']
    classification_report['1']['precision'] += report['1']['precision']
    classification_report['1']['recall'] += report['1']['recall']
    classification_report['1']['f1-score'] += report['1']['f1-score']
f1_score /= num_evaluaciones
auc_score /= num_evaluaciones
confusion_matrix /= num_evaluaciones
classification_report['0']['precision'] /= num_evaluaciones
classification_report['0']['recall'] /= num_evaluaciones
classification_report['0']['f1-score'] /= num_evaluaciones
classification_report['1']['precision'] /= num_evaluaciones
classification_report['1']['recall'] /= num_evaluaciones
classification_report['1']['f1-score'] /= num_evaluaciones
print("SUPPORT VECTOR MACHINE: ")
print("Media de f1 score: " + str(f1_score))
print("Media de auc score: " + str(auc_score))
print("Media de confusion matrix: " + str(confusion_matrix))
print("Media de classification report: " + str(classification_report))
print(" --------------------------------------------")

NAIVE BAYES: 
Media de f1 score: 0.14171510225157685
Media de auc score: 0.9190541155070487
Media de confusion matrix: [[439.4 293.6]
 [  0.   18. ]]
Media de classification report: {'0': {'precision': 0.9987581860937416, 'recall': 0.7092769440654842, 'f1-score': 0.7886564475496061}, '1': {'precision': 0.1126913634528031, 'recall': 0.9555555555555557, 'f1-score': 0.1978549767015424}}
 --------------------------------------------
DECISION TREE: 
Media de f1 score: 0.6709913490764554
Media de auc score: 0.9851978171896316
Media de confusion matrix: [[711.4  21.6]
 [  0.   18. ]]
Media de classification report: {'0': {'precision': 1.0, 'recall': 0.9748976807639836, 'f1-score': 0.9872508201716409}, '1': {'precision': 0.5182481940144479, 'recall': 1.0, 'f1-score': 0.6765274725274726}}
 --------------------------------------------
RANDOM FOREST: 
Media de f1 score: 0.523258648098043
Media de auc score: 0.9994959830225859
Media de confusion matrix: [[697.1  35.9]
 [  0.   18. ]]
Media de clas

## Media de 500 evaluaciones clases balanceadas

In [5]:
df = pd.read_excel("datos.xlsx")
positive_df, negative_df = split_positive_negative(df)
for i in range(int(negative_df.shape[0]/positive_df.shape[0])):
    df = pd.concat([df, positive_df])

num_evaluaciones = 500

# NAIVE BAYES
f1_score = 0.0
auc_score = 0.0
confusion_matrix = np.zeros((2, 2))
classification_report = {'0': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}}
for i in range(num_evaluaciones):
    f1_score += evaluar_todo_el_set_de_datos('NaiveBayes', df, 0.3, 'f1-score')
    auc_score += evaluar_todo_el_set_de_datos('NaiveBayes', df, 0.3, 'auc_score')
    confusion_matrix += evaluar_todo_el_set_de_datos('NaiveBayes', df, 0.3, 'confusion_matrix')
    report = evaluar_todo_el_set_de_datos('NaiveBayes', df, 0.3, 'classification_report')
    classification_report['0']['precision'] += report['0']['precision']
    classification_report['0']['recall'] += report['0']['recall']
    classification_report['0']['f1-score'] += report['0']['f1-score']
    classification_report['1']['precision'] += report['1']['precision']
    classification_report['1']['recall'] += report['1']['recall']
    classification_report['1']['f1-score'] += report['1']['f1-score']
f1_score /= num_evaluaciones
auc_score /= num_evaluaciones
confusion_matrix /= num_evaluaciones
classification_report['0']['precision'] /= num_evaluaciones
classification_report['0']['recall'] /= num_evaluaciones
classification_report['0']['f1-score'] /= num_evaluaciones
classification_report['1']['precision'] /= num_evaluaciones
classification_report['1']['recall'] /= num_evaluaciones
classification_report['1']['f1-score'] /= num_evaluaciones
print("NAIVE BAYES: ")
print("Media de f1 score: " + str(f1_score))
print("Media de auc score: " + str(auc_score))
print("Media de confusion matrix: " + str(confusion_matrix))
print("Media de classification report: " + str(classification_report))
print(" --------------------------------------------")

#DECISION TREE
f1_score = 0.0
auc_score = 0.0
confusion_matrix = np.zeros((2, 2))
classification_report = {'0': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}}
for i in range(num_evaluaciones):
    f1_score += evaluar_todo_el_set_de_datos('DecisionTree', df, 0.3, 'f1-score')
    auc_score += evaluar_todo_el_set_de_datos('DecisionTree', df, 0.3, 'auc_score')
    confusion_matrix += evaluar_todo_el_set_de_datos('DecisionTree', df, 0.3, 'confusion_matrix')
    report = evaluar_todo_el_set_de_datos('DecisionTree', df, 0.3, 'classification_report')
    classification_report['0']['precision'] += report['0']['precision']
    classification_report['0']['recall'] += report['0']['recall']
    classification_report['0']['f1-score'] += report['0']['f1-score']
    classification_report['1']['precision'] += report['1']['precision']
    classification_report['1']['recall'] += report['1']['recall']
    classification_report['1']['f1-score'] += report['1']['f1-score']
f1_score /= num_evaluaciones
auc_score /= num_evaluaciones
confusion_matrix /= num_evaluaciones
classification_report['0']['precision'] /= num_evaluaciones
classification_report['0']['recall'] /= num_evaluaciones
classification_report['0']['f1-score'] /= num_evaluaciones
classification_report['1']['precision'] /= num_evaluaciones
classification_report['1']['recall'] /= num_evaluaciones
classification_report['1']['f1-score'] /= num_evaluaciones
print("DECISION TREE: ")
print("Media de f1 score: " + str(f1_score))
print("Media de auc score: " + str(auc_score))
print("Media de confusion matrix: " + str(confusion_matrix))
print("Media de classification report: " + str(classification_report))
print(" --------------------------------------------")

# RANDOM FOREST
f1_score = 0.0
auc_score = 0.0
confusion_matrix = np.zeros((2, 2))
classification_report = {'0': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}}
for i in range(num_evaluaciones):
    f1_score += evaluar_todo_el_set_de_datos('RandomForest', df, 0.3, 'f1-score')
    auc_score += evaluar_todo_el_set_de_datos('RandomForest', df, 0.3, 'auc_score')
    confusion_matrix += evaluar_todo_el_set_de_datos('RandomForest', df, 0.3, 'confusion_matrix')
    report = evaluar_todo_el_set_de_datos('RandomForest', df, 0.3, 'classification_report')
    classification_report['0']['precision'] += report['0']['precision']
    classification_report['0']['recall'] += report['0']['recall']
    classification_report['0']['f1-score'] += report['0']['f1-score']
    classification_report['1']['precision'] += report['1']['precision']
    classification_report['1']['recall'] += report['1']['recall']
    classification_report['1']['f1-score'] += report['1']['f1-score']
f1_score /= num_evaluaciones
auc_score /= num_evaluaciones
confusion_matrix /= num_evaluaciones
classification_report['0']['precision'] /= num_evaluaciones
classification_report['0']['recall'] /= num_evaluaciones
classification_report['0']['f1-score'] /= num_evaluaciones
classification_report['1']['precision'] /= num_evaluaciones
classification_report['1']['recall'] /= num_evaluaciones
classification_report['1']['f1-score'] /= num_evaluaciones
print("RANDOM FOREST: ")
print("Media de f1 score: " + str(f1_score))
print("Media de auc score: " + str(auc_score))
print("Media de confusion matrix: " + str(confusion_matrix))
print("Media de classification report: " + str(classification_report))
print(" --------------------------------------------")

# SUPPORT VECTOR MACHINE
f1_score = 0.0
auc_score = 0.0
confusion_matrix = np.zeros((2, 2))
classification_report = {'0': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}}
for i in range(num_evaluaciones):
    f1_score += evaluar_todo_el_set_de_datos('SVM', df, 0.3, 'f1-score')
    auc_score += evaluar_todo_el_set_de_datos('SVM', df, 0.3, 'auc_score')
    confusion_matrix += evaluar_todo_el_set_de_datos('SVM', df, 0.3, 'confusion_matrix')
    report = evaluar_todo_el_set_de_datos('SVM', df, 0.3, 'classification_report')
    classification_report['0']['precision'] += report['0']['precision']
    classification_report['0']['recall'] += report['0']['recall']
    classification_report['0']['f1-score'] += report['0']['f1-score']
    classification_report['1']['precision'] += report['1']['precision']
    classification_report['1']['recall'] += report['1']['recall']
    classification_report['1']['f1-score'] += report['1']['f1-score']
f1_score /= num_evaluaciones
auc_score /= num_evaluaciones
confusion_matrix /= num_evaluaciones
classification_report['0']['precision'] /= num_evaluaciones
classification_report['0']['recall'] /= num_evaluaciones
classification_report['0']['f1-score'] /= num_evaluaciones
classification_report['1']['precision'] /= num_evaluaciones
classification_report['1']['recall'] /= num_evaluaciones
classification_report['1']['f1-score'] /= num_evaluaciones
print("SUPPORT VECTOR MACHINE: ")
print("Media de f1 score: " + str(f1_score))
print("Media de auc score: " + str(auc_score))
print("Media de confusion matrix: " + str(confusion_matrix))
print("Media de classification report: " + str(classification_report))
print(" --------------------------------------------")

NAIVE BAYES: 
Media de f1 score: 0.6952832268247378
Media de auc score: 0.7297727272727272
Media de confusion matrix: [[ 24.6 195.4]
 [  0.  222. ]]
Media de classification report: {'0': {'precision': 1.0, 'recall': 0.12272727272727271, 'f1-score': 0.21811627099567113}, '1': {'precision': 0.5349948906255181, 'recall': 1.0, 'f1-score': 0.6970476667116243}}
 --------------------------------------------
DECISION TREE: 
Media de f1 score: 0.9973134141209012
Media de auc score: 0.9965909090909092
Media de confusion matrix: [[218.3   1.7]
 [  0.  222. ]]
Media de classification report: {'0': {'precision': 1.0, 'recall': 0.9922727272727272, 'f1-score': 0.9961160853455608}, '1': {'precision': 0.9924204579168864, 'recall': 1.0, 'f1-score': 0.996190826324257}}
 --------------------------------------------
RANDOM FOREST: 
Media de f1 score: 0.9984279740011084
Media de auc score: 1.0
Media de confusion matrix: [[219.   1.]
 [  0. 222.]]
Media de classification report: {'0': {'precision': 1.0, 'rec

## Media de 500 evaluaciones con todo el conjunto de datos

In [6]:
df = pd.read_excel("datos.xlsx")
num_evaluaciones = 500

# NAIVE BAYES
f1_score = 0.0
auc_score = 0.0
confusion_matrix = np.zeros((2, 2))
classification_report = {'0': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}}
for i in range(num_evaluaciones):
    f1_score += evaluar_todo_el_set_de_datos('NaiveBayes', df, 0.3, 'f1-score')
    auc_score += evaluar_todo_el_set_de_datos('NaiveBayes', df, 0.3, 'auc_score')
    confusion_matrix += evaluar_todo_el_set_de_datos('NaiveBayes', df, 0.3, 'confusion_matrix')
    report = evaluar_todo_el_set_de_datos('NaiveBayes', df, 0.3, 'classification_report')
    classification_report['0']['precision'] += report['0']['precision']
    classification_report['0']['recall'] += report['0']['recall']
    classification_report['0']['f1-score'] += report['0']['f1-score']
    classification_report['1']['precision'] += report['1']['precision']
    classification_report['1']['recall'] += report['1']['recall']
    classification_report['1']['f1-score'] += report['1']['f1-score']
f1_score /= num_evaluaciones
auc_score /= num_evaluaciones
confusion_matrix /= num_evaluaciones
classification_report['0']['precision'] /= num_evaluaciones
classification_report['0']['recall'] /= num_evaluaciones
classification_report['0']['f1-score'] /= num_evaluaciones
classification_report['1']['precision'] /= num_evaluaciones
classification_report['1']['recall'] /= num_evaluaciones
classification_report['1']['f1-score'] /= num_evaluaciones
print("NAIVE BAYES: ")
print("Media de f1 score: " + str(f1_score))
print("Media de auc score: " + str(auc_score))
print("Media de confusion matrix: " + str(confusion_matrix))
print("Media de classification report: " + str(classification_report))
print(" --------------------------------------------")

#DECISION TREE
f1_score = 0.0
auc_score = 0.0
confusion_matrix = np.zeros((2, 2))
classification_report = {'0': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}}
for i in range(num_evaluaciones):
    f1_score += evaluar_todo_el_set_de_datos('DecisionTree', df, 0.3, 'f1-score')
    auc_score += evaluar_todo_el_set_de_datos('DecisionTree', df, 0.3, 'auc_score')
    confusion_matrix += evaluar_todo_el_set_de_datos('DecisionTree', df, 0.3, 'confusion_matrix')
    report = evaluar_todo_el_set_de_datos('DecisionTree', df, 0.3, 'classification_report')
    classification_report['0']['precision'] += report['0']['precision']
    classification_report['0']['recall'] += report['0']['recall']
    classification_report['0']['f1-score'] += report['0']['f1-score']
    classification_report['1']['precision'] += report['1']['precision']
    classification_report['1']['recall'] += report['1']['recall']
    classification_report['1']['f1-score'] += report['1']['f1-score']
f1_score /= num_evaluaciones
auc_score /= num_evaluaciones
confusion_matrix /= num_evaluaciones
classification_report['0']['precision'] /= num_evaluaciones
classification_report['0']['recall'] /= num_evaluaciones
classification_report['0']['f1-score'] /= num_evaluaciones
classification_report['1']['precision'] /= num_evaluaciones
classification_report['1']['recall'] /= num_evaluaciones
classification_report['1']['f1-score'] /= num_evaluaciones
print("DECISION TREE: ")
print("Media de f1 score: " + str(f1_score))
print("Media de auc score: " + str(auc_score))
print("Media de confusion matrix: " + str(confusion_matrix))
print("Media de classification report: " + str(classification_report))
print(" --------------------------------------------")

# RANDOM FOREST
f1_score = 0.0
auc_score = 0.0
confusion_matrix = np.zeros((2, 2))
classification_report = {'0': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}}
for i in range(num_evaluaciones):
    f1_score += evaluar_todo_el_set_de_datos('RandomForest', df, 0.3, 'f1-score')
    auc_score += evaluar_todo_el_set_de_datos('RandomForest', df, 0.3, 'auc_score')
    confusion_matrix += evaluar_todo_el_set_de_datos('RandomForest', df, 0.3, 'confusion_matrix')
    report = evaluar_todo_el_set_de_datos('RandomForest', df, 0.3, 'classification_report')
    classification_report['0']['precision'] += report['0']['precision']
    classification_report['0']['recall'] += report['0']['recall']
    classification_report['0']['f1-score'] += report['0']['f1-score']
    classification_report['1']['precision'] += report['1']['precision']
    classification_report['1']['recall'] += report['1']['recall']
    classification_report['1']['f1-score'] += report['1']['f1-score']
f1_score /= num_evaluaciones
auc_score /= num_evaluaciones
confusion_matrix /= num_evaluaciones
classification_report['0']['precision'] /= num_evaluaciones
classification_report['0']['recall'] /= num_evaluaciones
classification_report['0']['f1-score'] /= num_evaluaciones
classification_report['1']['precision'] /= num_evaluaciones
classification_report['1']['recall'] /= num_evaluaciones
classification_report['1']['f1-score'] /= num_evaluaciones
print("RANDOM FOREST: ")
print("Media de f1 score: " + str(f1_score))
print("Media de auc score: " + str(auc_score))
print("Media de confusion matrix: " + str(confusion_matrix))
print("Media de classification report: " + str(classification_report))
print(" --------------------------------------------")

# SUPPORT VECTOR MACHINE
f1_score = 0.0
auc_score = 0.0
confusion_matrix = np.zeros((2, 2))
classification_report = {'0': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score':0.0}}
for i in range(num_evaluaciones):
    f1_score += evaluar_todo_el_set_de_datos('SVM', df, 0.3, 'f1-score')
    auc_score += evaluar_todo_el_set_de_datos('SVM', df, 0.3, 'auc_score')
    confusion_matrix += evaluar_todo_el_set_de_datos('SVM', df, 0.3, 'confusion_matrix')
    report = evaluar_todo_el_set_de_datos('SVM', df, 0.3, 'classification_report')
    classification_report['0']['precision'] += report['0']['precision']
    classification_report['0']['recall'] += report['0']['recall']
    classification_report['0']['f1-score'] += report['0']['f1-score']
    classification_report['1']['precision'] += report['1']['precision']
    classification_report['1']['recall'] += report['1']['recall']
    classification_report['1']['f1-score'] += report['1']['f1-score']
f1_score /= num_evaluaciones
auc_score /= num_evaluaciones
confusion_matrix /= num_evaluaciones
classification_report['0']['precision'] /= num_evaluaciones
classification_report['0']['recall'] /= num_evaluaciones
classification_report['0']['f1-score'] /= num_evaluaciones
classification_report['1']['precision'] /= num_evaluaciones
classification_report['1']['recall'] /= num_evaluaciones
classification_report['1']['f1-score'] /= num_evaluaciones
print("SUPPORT VECTOR MACHINE: ")
print("Media de f1 score: " + str(f1_score))
print("Media de auc score: " + str(auc_score))
print("Media de confusion matrix: " + str(confusion_matrix))
print("Media de classification report: " + str(classification_report))
print(" --------------------------------------------")

NAIVE BAYES: 
Media de f1 score: 0.045521091508614484
Media de auc score: 0.6847963800904978
Media de confusion matrix: [[ 54.3 166.7]
 [  0.7   4.3]]
Media de classification report: {'0': {'precision': 0.9966666666666667, 'recall': 0.12398190045248869, 'f1-score': 0.21734310202491586}, '1': {'precision': 0.024089797228478392, 'recall': 0.96, 'f1-score': 0.046996958317969226}}
 --------------------------------------------
DECISION TREE: 
Media de f1 score: 0.6441308691308691
Media de auc score: 0.8877375565610862
Media de confusion matrix: [[220.3   0.7]
 [  1.4   3.6]]
Media de classification report: {'0': {'precision': 0.991905720063756, 'recall': 0.9932126696832579, 'f1-score': 0.9925373681082466}, '1': {'precision': 0.7344047619047619, 'recall': 0.6399999999999999, 'f1-score': 0.6486113886113886}}
 --------------------------------------------
RANDOM FOREST: 
Media de f1 score: 0.6684415584415584
Media de auc score: 0.9923076923076923
Media de confusion matrix: [[220.1   0.9]
 [  1.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


SUPPORT VECTOR MACHINE: 
Media de f1 score: 0.0
Media de auc score: 0.9772850678733033
Media de confusion matrix: [[221.   0.]
 [  5.   0.]]
Media de classification report: {'0': {'precision': 0.9778761061946903, 'recall': 1.0, 'f1-score': 0.9888143176733781}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0}}
 --------------------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
