In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

# Cargar el dataset de rendimiento estudiantil
df = pd.read_csv("exams.csv")

# Crear variable objetivo binaria: aprobó matemáticas io no
df['pass_math'] = (df['math score'] >= 60).astype(int)

# One-Hot Encoding de variables categóricas
df = pd.get_dummies(df, columns=[
    'gender', 'race/ethnicity', 'parental level of education',
    'lunch', 'test preparation course'
], drop_first=True)

# Separar variables de entrada y salida
X = df.drop(columns=['math score', 'pass_math'])
y = df['pass_math']

# Normalizar puntajes de lectura y escritura
scaler = StandardScaler()
X[['reading score', 'writing score']] = scaler.fit_transform(X[['reading score', 'writing score']])

# Dividir datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Modelo de Regresión Logística
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred_log = log_reg.predict(X_test)
acc_log = accuracy_score(y_test, y_pred_log)
print(f"Accuracy regresión logística: {acc_log:.4f}")

# Preparar tensores para PyTorch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train_t = torch.tensor(X_train.values, dtype=torch.float32).to(device)
X_test_t = torch.tensor(X_test.values, dtype=torch.float32).to(device)
y_train_t = torch.tensor(y_train.values, dtype=torch.float32).to(device)
y_test_t = torch.tensor(y_test.values, dtype=torch.float32).to(device)

# Modelo Red Neuronal
class BinaryClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(BinaryClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

model = BinaryClassifier(input_dim=X_train.shape[1], hidden_dim=64).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Entrenamiento
epochs = 100
losses = []
accuracies_nn = []

for epoch in range(epochs):
    model.train()
    y_pred = model(X_train_t).squeeze()
    loss = criterion(y_pred, y_train_t)
    losses.append(loss.item())

    y_pred_label = (y_pred > 0.5).float()
    accuracy_epoch = (y_pred_label == y_train_t).float().mean().item()
    accuracies_nn.append(accuracy_epoch)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 1000 == 0:
        print(f"Epoch {epoch}/{epochs} - Loss: {loss.item():.4f}")

# Evaluación final en test
torch.no_grad()
model.eval()
y_pred_test = model(X_test_t).squeeze()
y_pred_label = (y_pred_test > 0.5).float()
accuracy = (y_pred_label == y_test_t).float().mean().item()

print(f"Precisión en el conjunto de prueba (Red Neuronal): {accuracy:.4f}")

# Comparación de Modelos
print("\nComparación de Modelos:")
print(f"Regresión Logística → Accuracy: {acc_log:.4f}")
print(f"Red Neuronal → Accuracy: {accuracy:.4f}")

# Gráficas
plt.plot(losses)
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Pérdida durante el entrenamiento")
plt.grid(True)
plt.show()

plt.plot(accuracies_nn, label="Precisión", color='green')
plt.xlabel("Epochs")
plt.ylabel("Precisión")
plt.title("Precisión durante el entrenamiento de la Red Neuronal")
plt.grid(True)
plt.legend()
plt.show()

# Comparación
model_names = ['Regresión Logística', 'Red Neuronal']
accuracies = [acc_log, accuracy]

plt.figure(figsize=(6,4))
plt.bar(model_names, accuracies, color=['blue', 'green'])
plt.ylim(0.5, 1.0)
plt.title("Comparación de Precisión entre Modelos")
plt.ylabel("Precisión")
for i, acc in enumerate(accuracies):
    plt.text(i, acc + 0.02, f"{acc:.4f}", ha='center', fontsize=12)
plt.grid(axis='y')
plt.show()

# Gráfica combinada
fig, ax1 = plt.subplots(figsize=(10,5))

color = 'tab:red'
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss', color=color)
ax1.plot(losses, color=color, label='Pérdida')
ax1.tick_params(axis='y', labelcolor=color)
ax1.grid(True)

ax2 = ax1.twinx()
color = 'tab:blue'
ax2.set_ylabel('Accuracy', color=color)
ax2.plot(accuracies_nn, color=color, label='Precisión')
ax2.tick_params(axis='y', labelcolor=color)

plt.title("Pérdida y Precisión durante el Entrenamiento de la Red Neuronal")
fig.tight_layout()
plt.show()


Accuracy regresión logística: 0.9200


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.