In [1]:
import numpy as np
import pickle

In [17]:
def load_cifar10_batch(filename):
    with open(filename, 'rb') as f:
        batch = pickle.load(f, encoding='bytes')
        data = batch[b'data']
        labels = np.array(batch[b'labels'])
        return data, labels

def load_cifar10_data():
    x_train, y_train = [], []
    for i in range(1, 6):
        data, labels = load_cifar10_batch(f'datasets/cifar-10-batches-py/data_batch_{i}')
        x_train.append(data)
        y_train.append(labels)

    x_train = np.concatenate(x_train)
    y_train = np.concatenate(y_train)

    x_test, y_test = load_cifar10_batch(f'datasets/cifar-10-batches-py/test_batch')

    x_train = x_train / 255.0
    x_test = x_test / 255.0

    return x_train, y_train, x_test, y_test

In [5]:
x_train, y_train, x_test, y_test = load_cifar10_data()

In [6]:
np.random.seed(42)

In [7]:
input_size = 3072  # 32x32x3 pixels
hidden_size1 = 128
hidden_size2 = 64
output_size = 10  # CIFAR-10 has 10 classes

In [8]:
weights = {
    "W1": np.random.randn(input_size, hidden_size1) * np.sqrt(2.0 / input_size),
    "b1": np.zeros((1, hidden_size1)),
    "W2": np.random.randn(hidden_size1, hidden_size2) * np.sqrt(2.0 / hidden_size1),
    "b2": np.zeros((1, hidden_size2)),
    "W3": np.random.randn(hidden_size2, output_size) * np.sqrt(2.0 / hidden_size2),
    "b3": np.zeros((1, output_size))
}

In [9]:
def relu(Z):
    return np.maximum(0, Z)

def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return expZ / np.sum(expZ, axis=1, keepdims=True)

In [10]:
def forward_propagation(X, weights):
    Z1 = np.dot(X, weights["W1"]) + weights["b1"]
    A1 = relu(Z1)

    Z2 = np.dot(A1, weights["W2"]) + weights["b2"]
    A2 = relu(Z2)

    Z3 = np.dot(A2, weights["W3"]) + weights["b3"]
    A3 = softmax(Z3)

    return Z1, A1, Z2, A2, Z3, A3

In [11]:
def compute_loss(Y_pred, Y_true, weights, lambda_=0.01):
    m = Y_true.shape[0]
    log_likelihood = -np.log(Y_pred[range(m), Y_true])
    loss = np.sum(log_likelihood) / m

    # L2 Regularization
    L2_regularization = (lambda_ / (2 * m)) * (
        np.sum(weights["W1"] ** 2) + np.sum(weights["W2"] ** 2) + np.sum(weights["W3"] ** 2)
    )
    return loss + L2_regularization

In [12]:
def backpropagation(X, Y_true, A1, A2, A3, weights, learning_rate, lambda_):
    m = X.shape[0]
    
    # One-hot encoding of labels
    Y_one_hot = np.zeros((m, output_size))
    Y_one_hot[np.arange(m), Y_true] = 1

    # Compute gradients
    dZ3 = A3 - Y_one_hot
    dW3 = (np.dot(A2.T, dZ3) + lambda_ * weights["W3"]) / m
    db3 = np.sum(dZ3, axis=0, keepdims=True) / m

    dA2 = np.dot(dZ3, weights["W3"].T)
    dZ2 = dA2 * (A2 > 0)  # ReLU derivative
    dW2 = (np.dot(A1.T, dZ2) + lambda_ * weights["W2"]) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    dA1 = np.dot(dZ2, weights["W2"].T)
    dZ1 = dA1 * (A1 > 0)  # ReLU derivative
    dW1 = (np.dot(X.T, dZ1) + lambda_ * weights["W1"]) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    # Update weights
    weights["W1"] -= learning_rate * dW1
    weights["b1"] -= learning_rate * db1
    weights["W2"] -= learning_rate * dW2
    weights["b2"] -= learning_rate * db2
    weights["W3"] -= learning_rate * dW3
    weights["b3"] -= learning_rate * db3

In [13]:
epochs = 50
batch_size = 64
lambda_ = 0.01 # L2 regularization factor

In [14]:
for epoch in range(epochs):
    shuffle_indices = np.random.permutation(x_train.shape[0])
    X_train, y_train = x_train[shuffle_indices], y_train[shuffle_indices]

    for i in range(0, X_train.shape[0], batch_size):
        X_batch = x_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]

        Z1, A1, Z2, A2, Z3, A3 = forward_propagation(X_batch, weights)

        learning_rate = 0.01 / (1 + 0.01 * epoch) # Learning rate decay
        backpropagation(X_batch, y_batch, A1, A2, A3, weights, learning_rate, lambda_)


    _, _, _, _, _, train_pred = forward_propagation(x_train, weights)
    train_loss = compute_loss(train_pred, y_train, weights, lambda_)

    print(f"Epoch {epoch+1}/{epochs}, Loss: {train_loss:.4f}")
    

Epoch 1/50, Loss: 2.3072
Epoch 2/50, Loss: 2.3041
Epoch 3/50, Loss: 2.3025
Epoch 4/50, Loss: 2.3026
Epoch 5/50, Loss: 2.3027
Epoch 6/50, Loss: 2.3026
Epoch 7/50, Loss: 2.3025
Epoch 8/50, Loss: 2.3025
Epoch 9/50, Loss: 2.3026
Epoch 10/50, Loss: 2.3025
Epoch 11/50, Loss: 2.3025
Epoch 12/50, Loss: 2.3026
Epoch 13/50, Loss: 2.3026
Epoch 14/50, Loss: 2.3025
Epoch 15/50, Loss: 2.3026
Epoch 16/50, Loss: 2.3025
Epoch 17/50, Loss: 2.3026
Epoch 18/50, Loss: 2.3026
Epoch 19/50, Loss: 2.3025
Epoch 20/50, Loss: 2.3026
Epoch 21/50, Loss: 2.3026
Epoch 22/50, Loss: 2.3026
Epoch 23/50, Loss: 2.3026
Epoch 24/50, Loss: 2.3026
Epoch 25/50, Loss: 2.3026
Epoch 26/50, Loss: 2.3026
Epoch 27/50, Loss: 2.3026
Epoch 28/50, Loss: 2.3026
Epoch 29/50, Loss: 2.3026
Epoch 30/50, Loss: 2.3026
Epoch 31/50, Loss: 2.3026
Epoch 32/50, Loss: 2.3026
Epoch 33/50, Loss: 2.3026
Epoch 34/50, Loss: 2.3026
Epoch 35/50, Loss: 2.3026
Epoch 36/50, Loss: 2.3026
Epoch 37/50, Loss: 2.3026
Epoch 38/50, Loss: 2.3026
Epoch 39/50, Loss: 2.

In [15]:
def predict(X, weights):
    _, _, _, _, _, A3 = forward_propagation(X, weights)
    return np.argmax(A3, axis=1)

y_pred = predict(x_test, weights)
accuracy = np.mean(y_pred == y_test) * 100
print(f"Test Accuracy: {accuracy:.2f}%")


Test Accuracy: 10.00%


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 11ms/step - accuracy: 0.2756 - loss: 1.9934 - val_accuracy: 0.3098 - val_loss: 1.8702
Epoch 2/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.3756 - loss: 1.7550 - val_accuracy: 0.3846 - val_loss: 1.7112
Epoch 3/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.4091 - loss: 1.6555 - val_accuracy: 0.4148 - val_loss: 1.6583
Epoch 4/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - accuracy: 0.4273 - loss: 1.6060 - val_accuracy: 0.4304 - val_loss: 1.5980
Epoch 5/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.4397 - loss: 1.5774 - val_accuracy: 0.4370 - val_loss: 1.5572
Epoch 6/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.4507 - loss: 1.5334 - val_accuracy: 0.4494 - val_loss: 1.5412
Epoch 7/50
[1m704/704[0

In [2]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical

def load_cifar10_batch(filename):
    with open(filename, 'rb') as f:
        batch = pickle.load(f, encoding='bytes')
        data = batch[b'data']
        labels = np.array(batch[b'labels'])
        return data, labels

def load_cifar10_data(base_path):
    x_train, y_train = [], []
    for i in range(1, 6):
        data, labels = load_cifar10_batch(os.path.join(base_path, f'data_batch_{i}'))
        x_train.append(data)
        y_train.append(labels)

    x_train = np.concatenate(x_train)
    y_train = np.concatenate(y_train)
    print(x_train, y_train)

    x_test, y_test = load_cifar10_batch(os.path.join(base_path, 'test_batch'))

    x_train = x_train / 255.0
    x_test = x_test / 255.0

    return x_train, y_train, x_test, y_test

# Load from local path
x_train, y_train, x_test, y_test = load_cifar10_data(r'datasets/cifar-10-batches-py')

# One-hot encode labels
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Build the model
model = models.Sequential([
    layers.Dense(128, activation='relu', input_shape=(3072,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

# Compile
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train
model.fit(x_train, y_train, epochs=50, batch_size=64, validation_split=0.1)
# Evaluate
loss, acc = model.evaluate(x_test, y_test)
print(f'Test Accuracy: {acc * 100:.2f}%')




[[ 59  43  50 ... 140  84  72]
 [154 126 105 ... 139 142 144]
 [255 253 253 ...  83  83  84]
 ...
 [ 35  40  42 ...  77  66  50]
 [189 186 185 ... 169 171 171]
 [229 236 234 ... 173 162 161]] [6 9 9 ... 9 1 1]
Epoch 1/50
[1m648/704[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 4ms/step - accuracy: 0.2616 - loss: 2.0362

KeyboardInterrupt: 