In [41]:
import numpy as np
import keras

# Cargar CIFAR-10 desde keras
(Xtr, Ytr), (Xte, Yte) = keras.datasets.cifar10.load_data()
Ytr = Ytr.flatten()
Yte = Yte.flatten()

# Preprocesamiento: aplanar y agregar bias
Xtr_flat = Xtr.reshape(Xtr.shape[0], -1)
Xtr_bias = np.hstack([Xtr_flat, np.ones((Xtr_flat.shape[0], 1))])
X = Xtr_bias.T  # 3073 x 50000
y = Ytr  # ya aplanado

# Inicialización de pesos
np.random.seed(0)
W = 0.001 * np.random.randn(10, X.shape[0])

In [42]:
def L_i(x, y, W):
    delta = 1.0
    scores = W.dot(x)
    correct_class_score = scores[y]
    D = W.shape[0]
    loss_i = 0.0
    for j in range(D):
        if j == y:
            continue
        loss_i += max(0, scores[j] - correct_class_score + delta)
    return loss_i

def L_i_vectorized(x, y, W):
    delta = 1.0
    scores = W.dot(x)
    margins = np.maximum(0, scores - scores[y] + delta)
    margins[y] = 0
    return np.sum(margins)

def L(X, y, W):
    delta = 1.0
    scores = W.dot(X)  # shape: (10, N)
    correct_scores = scores[y, np.arange(X.shape[1])]  # shape: (N,)
    margins = np.maximum(0, scores - correct_scores + delta)
    margins[y, np.arange(X.shape[1])] = 0
    return np.sum(margins)

In [43]:
# Ejemplo con un solo dato (no vectorizado)
i = 0
x_i = X[:, i]
y_i = y[i]
loss1 = L_i(x_i, y_i, W)
loss2 = L_i_vectorized(x_i, y_i, W)

print(f"L_i: {loss1:.4f}")
print(f"L_i_vectorized: {loss2:.4f}")

# Versión completamente vectorizada
total_loss = L(X[:, :1000], y[:1000], W)
print(f"Total vectorized loss (1000 examples): {total_loss:.4f}")

L_i: 12.3610
L_i_vectorized: 12.3610
Total vectorized loss (1000 examples): 53188.5249


In [44]:
# Problema numérico: grandes exponentes
f = np.array([123, 456, 789])
p_bad = np.exp(f) / np.sum(np.exp(f))  # puede dar overflow

# Solución: restar el máximo
f_stable = f - np.max(f)
p_good = np.exp(f_stable) / np.sum(np.exp(f_stable))
print("Softmax estable:", p_good)

Softmax estable: [5.75274406e-290 2.39848787e-145 1.00000000e+000]


  p_bad = np.exp(f) / np.sum(np.exp(f))  # puede dar overflow
  p_bad = np.exp(f) / np.sum(np.exp(f))  # puede dar overflow
