## **Obtencion y preparacion**

In [658]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#%matplotlib qt
df = pd.read_csv("data/data_uci.csv", sep=";")

#Interpolacion de valores 0 para Frecuencia cardiaca maxima
df['fcm'].replace(to_replace=0, value=np.nan, inplace=True)
df['fcm'].interpolate(method='linear', inplace=True)

#Interpolacion de valores 0 para presion arterial en reposo
df['par'].replace(to_replace=0, value=np.nan, inplace=True)
df['par'].replace(to_replace=-1, value=np.nan, inplace=True)
df['par'].interpolate(method='linear', inplace=True)

#Modificamos el dataset para trabajar los datos categoricos
df['riesgo'] = df['riesgo'].map({'bajo': 0, 'alto': 1})
#Separamos las variables objetivo y entrada.
X = df[['edad', 'td', 'par', 'col', 'fcm']]
Y = df['riesgo']

#Aplicamos el oversampling visto en clases de práctica:
from imblearn.over_sampling import RandomOverSampler

oversample = RandomOverSampler(sampling_strategy='minority')
X_over, Y_over = oversample.fit_resample(X, Y)

# Normalizacion
def mean_norm(df_input):
    return df_input.apply(lambda x: (x - x.mean()) / x.std(), axis=0)

def minmax_norm(df_input):
    return (df_input - df_input.min()) / (df_input.max() - df_input.min())

X_over_normalized = minmax_norm(X_over)

## **Modelo MLP**

In [730]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

In [731]:
x_train, x_test, y_train, y_test = train_test_split(X_over_normalized,Y_over,train_size=0.7,test_size=0.3)

In [744]:
classifier_model = MLPClassifier(max_iter=50000)

In [745]:
classifier_model.fit(x_train, y_train)

In [746]:
pred = classifier_model.predict(x_test)
pred_df = pd.DataFrame(pred)

In [747]:
classifier_model.score(x_test, y_test)

0.8080808080808081

In [748]:
pred_df = pred_df.apply(lambda x: x.apply(lambda s: 1 if (s >= 0.50) else 0))

In [749]:
f1_score(y_test,pred_df)

0.8

## **Modelo naive bayes - Gaussian**

In [720]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(x_train,y_train)

In [721]:
clf.score(x_test,y_test)

0.7373737373737373

In [724]:
y_predicted = clf.predict(x_test)

In [725]:
f1_score(y_test,y_predicted)

0.7547169811320756