In [1]:
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def compute_loss(y_true, y_pred):
    m = y_true.shape[0]
    epsilon = 1e-15
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

def train_logistic_regression(X, y, lr=0.01, epochs=1000):
    m, n = X.shape
    theta = np.zeros((n, 1))
    bias = 0
    loss_history = []

    for epoch in range(epochs):
        z = np.dot(X, theta) + bias
        y_hat = sigmoid(z)

        loss = compute_loss(y, y_hat)
        loss_history.append(loss)

        dz = y_hat - y
        dw = np.dot(X.T, dz) / m
        db = np.sum(dz) / m

        theta -= lr * dw
        bias -= lr * db

    return theta, bias, loss_history


In [3]:
def predict(X, theta, bias, threshold=0.5):
    probs = sigmoid(np.dot(X, theta) + bias)
    return (probs >= threshold).astype(int)

def compute_f1_score(y_true, y_pred):
    tp = np.sum((y_true == 1) & (y_pred == 1))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))

    if tp + fp == 0 or tp + fn == 0:
        return 0.0  # evitar división por cero

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    if precision + recall == 0:
        return 0.0

    return 2 * (precision * recall) / (precision + recall)



In [None]:
import pandas as pd
import numpy as np

# Cargar tu dataframe (asegúrate de que esté en tu entorno)
df = train_df.copy()
df = df.drop(columns=["paciente_id"])         # 1. Eliminar ID
df["genero"] = df["genero"].map({"M": 1, "F": 0})  # 2. Codificar genero

X = df.drop(columns=["target"])
y = df["target"].values.reshape(-1, 1)

# 3. Estandarizar variables numéricas (menos 'genero')
numeric_cols = X.columns.tolist()
numeric_cols.remove("genero")

means = X[numeric_cols].mean()
stds = X[numeric_cols].std()
X[numeric_cols] = (X[numeric_cols] - means) / stds

X_final = X.values  # X_final vuelve a existir
