In [None]:
# importar librerias
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Procesamiento de Datos

In [None]:
# obtener el conjunto de datos
df = pd.read_csv('data.csv')

In [None]:
df.info()

In [None]:
df.head(5)

In [None]:
df["Diet"].unique()

In [None]:
# Verify if there are any null values
print(df.isna().sum())

## Transformación de Datos


In [None]:
from sklearn.preprocessing import MinMaxScaler

# Seleccionar las características relevantes para el SOM (Age, Cholesterol, Blood Pressure, ...)
features = [
    # "Patient ID",
    "Age",
    "Sex",
    "Cholesterol",
    "Blood Pressure",
    "Heart Rate",
    "Diabetes",
    "Family History",
    "Smoking",
    "Alcohol Consumption",
    "Exercise Hours Per Week",
    "Diet",
    "Previous Heart Problems",
    "Sedentary Hours Per Day",
    "BMI",
    "Physical Activity Days Per Week",
    "Sleep Hours Per Day",
    "Heart Attack Risk",
]
df = df[features]

# Dividir la columna 'Blood Pressure' en dos columnas separadas
df[["Systolic Pressure", "Diastolic Pressure"]] = df["Blood Pressure"].str.split(
    "/", expand=True
)
df.drop(columns=["Blood Pressure"], inplace=True)

# Convertir la columna "Diet" (['Average', 'Unhealthy', 'Healthy']) a numerico
df["Diet"] = df["Diet"].apply(
    lambda x: 0 if x == "Unhealthy" else 1 if x == "Average" else 2
)

# Convertir las columnas a valores numéricos}
# df["Patient ID"] = df["Patient ID"].apply(lambda x: x[3:])

df["Systolic Pressure"] = pd.to_numeric(df["Systolic Pressure"])
df["Diastolic Pressure"] = pd.to_numeric(df["Diastolic Pressure"])
df["Sex"] = df["Sex"].apply(lambda x: 1 if x == "Male" else 0)

# Model SOM

In [None]:
# Import the model of SOM
from minisom import MiniSom

In [None]:
scaler = MinMaxScaler()
X = scaler.fit_transform(df)

In [None]:
# Initialize the SOM
som = MiniSom( x = 10, y = 10, input_len = 18, sigma = 1.0, learning_rate = 0.5)

In [None]:
# Initialize the weights
som.random_weights_init(X)

In [None]:
# Train your model
som.train_random(data = X, num_iteration = 1000)

In [None]:
# Make the map of the SOM
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(som.distance_map().T)
colorbar()
markers = ['o' , 's']
colors = ['r', 'b']
for i, x in enumerate(X):
    w = som.winner(x)
    plot(w[0] + 0.5,
         w[1] + 0.5,
         markers[y[i]],
         markeredgecolor = colors[y[i]],
         markerfacecolor = 'None',
         markersize = 10,
         markeredgewidth = 2)
show()

# Model Perceptron


In [None]:
from sklearn.linear_model import Perceptron

In [None]:
df.head()

In [None]:
# Split the dataset
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    df.drop("Heart Attack Risk", axis=1), df["Heart Attack Risk"], random_state=0
)

In [None]:
# Eliminamos el Id del pasajero en el conjunto de datos de prueba
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# Perceptron
perceptron = Perceptron()
perceptron.fit(X_train, y_train)
Y_pred = perceptron.predict(X_test)
acc_perceptron = round(perceptron.score(X_train, y_train) * 100, 2)
acc_perceptron