###Strategy 1: A first very bad idea solution: Random search

In [2]:
import numpy as np


In [6]:
# Función de pérdida de ejemplo (softmax + cross-entropy)
def L(X, y, W):
    scores = np.dot(W, X)
    scores -= np.max(scores, axis=0, keepdims=True)
    exp_scores = np.exp(scores)
    probs = exp_scores / np.sum(exp_scores, axis=0, keepdims=True)

    N = X.shape[1]
    correct_logprobs = -np.log(probs[y, range(N)])
    data_loss = np.sum(correct_logprobs) / N
    return data_loss

# assume X_train is the data where each column is an example (e.g. 3073 x 50,000)
# assume Y_train are the labels (e.g. 1D array of 50,000)
# assume the function L evaluates the loss function
X_train = np.random.randn(3073, 50000)
Y_train = np.random.randint(0, 10, 50000)

bestloss = float("inf") # Python assigns the highest possible float value
for num in range(100):
  W = np.random.randn(10, 3073) * 0.0001 # generate random parameters
  loss = L(X_train, Y_train, W) # get the loss over the entire training set
  if loss < bestloss: # keep track of the best solution
    bestloss = loss
    bestW = W
  print('In attempt %d the loss was %f, best %f' % (num, loss, bestloss))


In attempt 0 the loss was 2.302625, best 2.302625
In attempt 1 the loss was 2.302580, best 2.302580
In attempt 2 the loss was 2.302569, best 2.302569
In attempt 3 the loss was 2.302565, best 2.302565
In attempt 4 the loss was 2.302617, best 2.302565
In attempt 5 the loss was 2.302635, best 2.302565
In attempt 6 the loss was 2.302587, best 2.302565
In attempt 7 the loss was 2.302618, best 2.302565
In attempt 8 the loss was 2.302594, best 2.302565
In attempt 9 the loss was 2.302609, best 2.302565
In attempt 10 the loss was 2.302614, best 2.302565
In attempt 11 the loss was 2.302603, best 2.302565
In attempt 12 the loss was 2.302584, best 2.302565
In attempt 13 the loss was 2.302606, best 2.302565
In attempt 14 the loss was 2.302603, best 2.302565
In attempt 15 the loss was 2.302604, best 2.302565
In attempt 16 the loss was 2.302610, best 2.302565
In attempt 17 the loss was 2.302577, best 2.302565
In attempt 18 the loss was 2.302620, best 2.302565
In attempt 19 the loss was 2.302587, best

In [7]:
# Simula datos de entrenamiento y prueba
X_train = np.random.randn(3073, 50000)
Y_train = np.random.randint(0, 10, 50000)

X_test = np.random.randn(3073, 10000)
Y_test = np.random.randint(0, 10, 10000)

# Función de pérdida
def L(X, y, W):
    scores = np.dot(W, X)
    scores -= np.max(scores, axis=0, keepdims=True)
    exp_scores = np.exp(scores)
    probs = exp_scores / np.sum(exp_scores, axis=0, keepdims=True)
    N = X.shape[1]
    correct_logprobs = -np.log(probs[y, range(N)])
    loss = np.sum(correct_logprobs) / N
    return loss

# Búsqueda aleatoria de mejores pesos
bestloss = float("inf")
for num in range(100):
    W = np.random.randn(10, 3073) * 0.0001
    loss = L(X_train, Y_train, W)
    if loss < bestloss:
        bestloss = loss
        bestW = W
    print(f"In attempt {num} the loss was {loss:.6f}, best {bestloss:.6f}")

# Evaluación en test set
scores = bestW.dot(X_test)
Y_pred = np.argmax(scores, axis=0)
accuracy = np.mean(Y_pred == Y_test)
print(f"Test accuracy: {accuracy:.4f}")


In attempt 0 the loss was 2.302579, best 2.302579
In attempt 1 the loss was 2.302616, best 2.302579
In attempt 2 the loss was 2.302595, best 2.302579
In attempt 3 the loss was 2.302593, best 2.302579
In attempt 4 the loss was 2.302602, best 2.302579
In attempt 5 the loss was 2.302594, best 2.302579
In attempt 6 the loss was 2.302631, best 2.302579
In attempt 7 the loss was 2.302580, best 2.302579
In attempt 8 the loss was 2.302592, best 2.302579
In attempt 9 the loss was 2.302604, best 2.302579
In attempt 10 the loss was 2.302633, best 2.302579
In attempt 11 the loss was 2.302605, best 2.302579
In attempt 12 the loss was 2.302588, best 2.302579
In attempt 13 the loss was 2.302586, best 2.302579
In attempt 14 the loss was 2.302609, best 2.302579
In attempt 15 the loss was 2.302625, best 2.302579
In attempt 16 the loss was 2.302653, best 2.302579
In attempt 17 the loss was 2.302564, best 2.302564
In attempt 18 the loss was 2.302613, best 2.302564
In attempt 19 the loss was 2.302623, best

### Strategy 2: Random Local Search

In [8]:
# Simulación de datos de entrenamiento (si no tienes datos reales)
X_train = np.random.randn(3073, 50000)        # (D, N)
Y_train = np.random.randint(0, 10, 50000)     # (N,)

# Función de pérdida softmax + cross-entropy
def L(X, y, W):
    scores = np.dot(W, X)
    scores -= np.max(scores, axis=0, keepdims=True)
    exp_scores = np.exp(scores)
    probs = exp_scores / np.sum(exp_scores, axis=0, keepdims=True)
    N = X.shape[1]
    correct_logprobs = -np.log(probs[y, range(N)])
    loss = np.sum(correct_logprobs) / N
    return loss

# Estrategia #2: Búsqueda local aleatoria
W = np.random.randn(10, 3073) * 0.001   # pesos iniciales
bestloss = float("inf")

step_size = 0.0001

for i in range(100):
    Wtry = W + np.random.randn(10, 3073) * step_size
    loss = L(X_train, Y_train, Wtry)

    if loss < bestloss:
        W = Wtry
        bestloss = loss

    print(f"Iter {i}, Best loss: {bestloss:.6f}")


Iter 0, Best loss: 2.303958
Iter 1, Best loss: 2.303958
Iter 2, Best loss: 2.303958
Iter 3, Best loss: 2.303935
Iter 4, Best loss: 2.303935
Iter 5, Best loss: 2.303935
Iter 6, Best loss: 2.303935
Iter 7, Best loss: 2.303928
Iter 8, Best loss: 2.303920
Iter 9, Best loss: 2.303919
Iter 10, Best loss: 2.303916
Iter 11, Best loss: 2.303916
Iter 12, Best loss: 2.303895
Iter 13, Best loss: 2.303881
Iter 14, Best loss: 2.303857
Iter 15, Best loss: 2.303853
Iter 16, Best loss: 2.303848
Iter 17, Best loss: 2.303848
Iter 18, Best loss: 2.303848
Iter 19, Best loss: 2.303835
Iter 20, Best loss: 2.303835
Iter 21, Best loss: 2.303835
Iter 22, Best loss: 2.303808
Iter 23, Best loss: 2.303808
Iter 24, Best loss: 2.303808
Iter 25, Best loss: 2.303808
Iter 26, Best loss: 2.303806
Iter 27, Best loss: 2.303806
Iter 28, Best loss: 2.303785
Iter 29, Best loss: 2.303785
Iter 30, Best loss: 2.303767
Iter 31, Best loss: 2.303767
Iter 32, Best loss: 2.303761
Iter 33, Best loss: 2.303761
Iter 34, Best loss: 2.30

###Strategy 3: Following the Gradient

In [9]:
def eval_numerical_gradient(f, x):
  """
  a naive implementation of numerical gradient of f at x
  - f should be a function that takes a single argument
  - x is the point (numpy array) to evaluate the gradient at
  """

  fx = f(x) # evaluate function value at original point
  grad = np.zeros(x.shape)
  h = 0.00001

  # iterate over all indexes in x
  it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
  while not it.finished:

    # evaluate function at x+h
    ix = it.multi_index
    old_value = x[ix]
    x[ix] = old_value + h # increment by h
    fxh = f(x) # evalute f(x + h)
    x[ix] = old_value # restore to previous value (very important!)

    # compute the partial derivative
    grad[ix] = (fxh - fx) / h # the slope
    it.iternext() # step to next dimension

  return grad

In [10]:
# to use the generic code above we want a function that takes a single argument
# (the weights in our case) so we close over X_train and Y_train
def CIFAR10_loss_fun(W, batch_size=1000):
  indices = np.random.choice(X_train.shape[1], 1000, replace=False)
  X_batch = X_train[:, indices] #mini-batch
  Y_batch = Y_train[indices]  #mini-batch
  return L(X_train, Y_train, W)

W = np.random.rand(10, 3073) * 0.001 # random weight vector
#df = eval_numerical_gradient(CIFAR10_loss_fun, W) # get the gradient
batch_size = 1000
fixed_indices = np.random.choice(X_train.shape[1], batch_size, replace=False)
X_batch_fixed = X_train[:, fixed_indices]
Y_batch_fixed = Y_train[fixed_indices]

def CIFAR10_loss_fun_fixed(W):
    return L(X_batch_fixed, Y_batch_fixed, W)

# Ahora sí, calcula el gradiente numérico con el mini-batch fijo
df = eval_numerical_gradient(CIFAR10_loss_fun_fixed, W)

In [11]:
loss_original = CIFAR10_loss_fun(W, batch_size=1000) # the original loss
print ('original loss: %f' % (loss_original, ))

# lets see the effect of multiple step sizes
for step_size_log in [-10, -9, -8, -7, -6, -5,-4,-3,-2,-1]:
  step_size = 10 ** step_size_log
  W_new = W - step_size * df # new position in the weight space
  loss_new = CIFAR10_loss_fun(W_new)
  print ('for step size %f new loss: %f' % (step_size, loss_new))

original loss: 2.302689
for step size 0.000000 new loss: 2.302689
for step size 0.000000 new loss: 2.302689
for step size 0.000000 new loss: 2.302689
for step size 0.000000 new loss: 2.302689
for step size 0.000001 new loss: 2.302688
for step size 0.000010 new loss: 2.302688
for step size 0.000100 new loss: 2.302683
for step size 0.001000 new loss: 2.302632
for step size 0.010000 new loss: 2.302137
for step size 0.100000 new loss: 2.298517


In [19]:
# Vanilla Minibatch Gradient Descent

#while True:
  #weights_grad = evaluate_gradient(loss_fun, data_batch, weights)
  #weights += - step_size * weights_grad # perform parameter update

import numpy as np

# Supón que ya tienes tus datos en estas variables:
# X_train: (3073, 50000)
# Y_train: (50000,)
# X_test, Y_test igual forma

def softmax_loss(X, y, W):
    N = X.shape[1]
    scores = W @ X
    scores -= np.max(scores, axis=0, keepdims=True)
    probs = np.exp(scores) / np.sum(np.exp(scores), axis=0, keepdims=True)

    loss = -np.sum(np.log(probs[y, range(N)])) / N

    dscores = probs.copy()
    dscores[y, range(N)] -= 1
    dscores /= N

    dW = dscores @ X.T
    return loss, dW

# Inicializar pesos
W = 0.001 * np.random.randn(10, 3073)
step_size = 1e-7
num_iters = 1000
batch_size = 1000

for i in range(num_iters):
    # Seleccionar mini-batch aleatorio
    indices = np.random.choice(X_train.shape[1], batch_size, replace=False)
    X_batch = X_train[:, indices]
    Y_batch = Y_train[indices]

    # Calcular pérdida y gradiente
    loss, grad = softmax_loss(X_batch, Y_batch, W)

    # Actualizar pesos
    W -= step_size * grad

    # Mostrar progreso
    if i % 100 == 0:
        print(f"Iteración {i}: loss = {loss:.4f}")

# Evaluar en test set
scores = W @ X_test
Y_pred = np.argmax(scores, axis=0)
acc = np.mean(Y_pred == Y_test)
print(f"Precisión en test: {acc:.4f}")


Iteración 0: loss = 2.3019
Iteración 100: loss = 2.3058
Iteración 200: loss = 2.3038
Iteración 300: loss = 2.3086
Iteración 400: loss = 2.3011
Iteración 500: loss = 2.3051
Iteración 600: loss = 2.3026
Iteración 700: loss = 2.3054
Iteración 800: loss = 2.3041
Iteración 900: loss = 2.3020
Precisión en test: 0.1009


In [21]:
# Vanilla Minibatch Gradient Descent

#while True:
  #data_batch = sample_training_data(data, 256) # sample 256 examples
  #weights_grad = evaluate_gradient(loss_fun, data_batch, weights)
  #weights += - step_size * weights_grad # perform parameter update

import numpy as np

# Supón que tienes X_train (3073, N), Y_train (N,)
# También tienes X_test, Y_test para evaluación

def softmax_loss(X, y, W):
    N = X.shape[1]
    scores = W @ X
    scores -= np.max(scores, axis=0, keepdims=True)

    probs = np.exp(scores) / np.sum(np.exp(scores), axis=0, keepdims=True)
    loss = -np.sum(np.log(probs[y, range(N)])) / N

    dscores = probs
    dscores[y, range(N)] -= 1
    dscores /= N

    dW = dscores @ X.T
    return loss, dW

# Inicialización
W = 0.001 * np.random.randn(10, 3073)
step_size = 1e-7
num_iters = 1000
batch_size = 256

for i in range(num_iters):
    # Mini-batch aleatorio
    indices = np.random.choice(X_train.shape[1], batch_size, replace=False)
    X_batch = X_train[:, indices]
    Y_batch = Y_train[indices]

    # Calcular pérdida y gradiente
    loss, grad = softmax_loss(X_batch, Y_batch, W)

    # Actualización de pesos
    W -= step_size * grad

    if i % 100 == 0:
        print(f"Iter {i}: loss = {loss:.4f}")


Iter 0: loss = 2.3061
Iter 100: loss = 2.3009
Iter 200: loss = 2.3063
Iter 300: loss = 2.3005
Iter 400: loss = 2.2941
Iter 500: loss = 2.3074
Iter 600: loss = 2.3063
Iter 700: loss = 2.2973
Iter 800: loss = 2.3012
Iter 900: loss = 2.3063


In [22]:
scores_test = W @ X_test
Y_pred = np.argmax(scores_test, axis=0)
acc = np.mean(Y_pred == Y_test)
print(f"Test accuracy: {acc:.4f}")


Test accuracy: 0.0993
