In [1]:
!pip uninstall numpy

Found existing installation: numpy 2.0.2
Uninstalling numpy-2.0.2:
  Would remove:
    /usr/local/bin/f2py
    /usr/local/bin/numpy-config
    /usr/local/lib/python3.11/dist-packages/numpy-2.0.2.dist-info/*
    /usr/local/lib/python3.11/dist-packages/numpy.libs/libgfortran-040039e1-0352e75f.so.5.0.0
    /usr/local/lib/python3.11/dist-packages/numpy.libs/libquadmath-96973f99-934c22de.so.0.0.0
    /usr/local/lib/python3.11/dist-packages/numpy.libs/libscipy_openblas64_-99b71e71.so
    /usr/local/lib/python3.11/dist-packages/numpy/*
Proceed (Y/n)? y
  Successfully uninstalled numpy-2.0.2


In [2]:
!pip install numpy==1.25.1

Collecting numpy==1.25.1
  Downloading numpy-1.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Downloading numpy-1.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
blosc2 3.3.2 requires numpy>=1.26, but you have numpy 1.25.1 which is incompatible.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.25.1 which is incompatible.
treescope 0.1.9 requires numpy>=1.25.2, but you have numpy 1.25.1 which is incompatible.
tensorflow 2.18.0 requires numpy<2.1.0,>=1.26.0, but you have numpy 1.25.1 which is incompatible.[0m[31m
[0mSuccessfully installed numpy-1.25.1


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import torch.nn.init as init
import numpy as np
import os

# 1 HL NN

In [87]:
# === Config ===
hidden_size = 2
epochs = 10
batch_size = 128

# === Data ===
transform = transforms.Compose([
    transforms.ToTensor(),  # Converts to float32 in [0, 1]
    transforms.Normalize((0.1307,), (0.3081,)),  # Normalize with MNIST mean and std
    transforms.Lambda(lambda x: x / 100),  # Divide by 100 after normalization
    transforms.Lambda(lambda x: x.view(-1).double())  # Flatten and convert to float64
])

train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_data  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Filter only digits 0 and 1
train_idx = [i for i, (_, y) in enumerate(train_data) if y in [0, 1]]
test_idx  = [i for i, (_, y) in enumerate(test_data)  if y in [0, 1]]

train_loader = DataLoader(Subset(train_data, train_idx), batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(Subset(test_data,  test_idx),  batch_size=batch_size)

# === Model ===
class MyModel(nn.Module):
    def __init__(self, hidden_size):
        super(MyModel, self).__init__()
        self.fc1 = nn.Linear(784, hidden_size, dtype=torch.float64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, 1, dtype=torch.float64)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

model = MyModel(hidden_size).double()  # instantiate with hidden_size

# === Training ===
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(epochs):
    model.train()
    for xb, yb in train_loader:
        xb = xb.double()
        yb = yb.double().unsqueeze(1)  # (batch_size, 1)
        out = model(xb)
        loss = criterion(out, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch + 1} - Loss: {loss.item():.4f}")

# === Evaluation ===
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.double()
        out = model(xb)
        pred = (out > 0).int().squeeze()
        correct += (pred == yb.int()).sum().item()
        total += yb.size(0)

print(f"Hard-label accuracy: {100 * correct / total:.2f}%")

# === Save weights & biases ===
weights = [
    model.fc1.weight.detach().cpu().numpy(),        # shape: (2, 784)
    model.fc2.weight.detach().cpu().numpy()         # shape: (1, 2)
]
biases = [
    model.fc1.bias.detach().cpu().numpy().reshape(-1, 1),  # shape: (2, 1)
    model.fc2.bias.detach().cpu().numpy().reshape(-1, 1)   # shape: (1, 1)
]

Epoch 1 - Loss: 0.6615
Epoch 2 - Loss: 0.6205
Epoch 3 - Loss: 0.5212
Epoch 4 - Loss: 0.4874
Epoch 5 - Loss: 0.3988
Epoch 6 - Loss: 0.3411
Epoch 7 - Loss: 0.2797
Epoch 8 - Loss: 0.2649
Epoch 9 - Loss: 0.2725
Epoch 10 - Loss: 0.2047
Hard-label accuracy: 99.67%


In [83]:
print(weights)

[array([[ 0.03100247, -0.00498552,  0.02210274, ...,  0.0108135 ,
        -0.02854328, -0.00452127],
       [-0.54996544, -0.49725528, -0.50737784, ..., -0.5460584 ,
        -0.52039397, -0.49743261]]), array([[0.45798796, 1.34748298]])]


In [86]:
# Save as .npz with arr_0 (weights), arr_1 (biases)
save_path = "./relu_mnist_hidden2_try_2.npz"
np.savez(save_path,
         arr_0=np.array(weights, dtype=object),
         arr_1=np.array(biases, dtype=object))

print(f"Model saved to: {os.path.abspath(save_path)}")

Model saved to: /content/relu_mnist_hidden2_try_2.npz


#Two Classes

In [88]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import numpy as np

# === Config ===
hidden_size = 2
output_size = 2   # softmax outputs before collapsing
epochs = 10
batch_size = 128
model_path = "relu_2_classes.npz"  # save filename

# === Data ===
transform = transforms.Compose([
    transforms.ToTensor(),  # Converts to float32 in [0, 1]
    transforms.Normalize((0.1307,), (0.3081,)),  # Normalize with MNIST mean and std
    transforms.Lambda(lambda x: x / 100),  # Divide by 100 after normalization
    transforms.Lambda(lambda x: x.view(-1).double())  # Flatten and convert to float64
])

train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_data  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Filter only digits 0 and 1
train_idx = [i for i, (_, y) in enumerate(train_data) if y in [0, 1]]
test_idx  = [i for i, (_, y) in enumerate(test_data)  if y in [0, 1]]

train_loader = DataLoader(Subset(train_data, train_idx), batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(Subset(test_data,  test_idx),  batch_size=batch_size)

# === Model ===
class TwoOutputNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, hidden_size, dtype=torch.float64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size, dtype=torch.float64)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)  # raw logits
        return x

model = TwoOutputNN().double()  # ensure all params are float64

# === Training ===
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(epochs):
    model.train()
    for xb, yb in train_loader:
        xb = xb.double()        # cast inputs to float64
        yb = yb.long()          # for CrossEntropyLoss
        out = model(xb)
        loss = criterion(out, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1} - Loss: {loss.item():.4f}")

# === Evaluation ===
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.double()
        out = model(xb)
        pred = torch.argmax(out, dim=1)
        correct += (pred == yb).sum().item()
        total += yb.size(0)

print(f"Hard-label accuracy: {100 * correct / total:.2f}%")

# === Collapse output: class1 - class0 ===
fc2_weight = model.fc2.weight.detach().cpu().numpy()       # shape: (2, 2)
fc2_bias   = model.fc2.bias.detach().cpu().numpy().reshape(-1, 1)  # shape: (2, 1)

# Compute final collapsed layer: w = w1 - w0, b = b1 - b0
collapsed_w = (fc2_weight[1] - fc2_weight[0]).reshape(1, hidden_size)  # (1, 2)
collapsed_b = (fc2_bias[1]   - fc2_bias[0]).reshape(1, 1)              # (1, 1)

# === Save as [784 → 2 → 1] model ===
weights = [
    model.fc1.weight.detach().cpu().numpy(),  # shape: (2, 784)
    collapsed_w                               # shape: (1, 2)
]
biases = [
    model.fc1.bias.detach().cpu().numpy().reshape(-1, 1),  # shape: (2, 1)
    collapsed_b                                            # shape: (1, 1)
]

np.savez(model_path,
         arr_0=np.array(weights, dtype=object),
         arr_1=np.array(biases, dtype=object))

print(f"Model saved to {model_path}")


Epoch 1 - Loss: 0.7143
Epoch 2 - Loss: 0.6940
Epoch 3 - Loss: 0.6587
Epoch 4 - Loss: 0.5763
Epoch 5 - Loss: 0.4445
Epoch 6 - Loss: 0.3881
Epoch 7 - Loss: 0.3625
Epoch 8 - Loss: 0.2573
Epoch 9 - Loss: 0.2493
Epoch 10 - Loss: 0.2194
Hard-label accuracy: 99.34%
✅ Model saved to relu_2_classes.npz


In [None]:
fcn = np.load('/content/2_classes_784_2_1.npz', allow_pickle=True)
weights, biases = fcn['ws'], fcn['bs']

In [None]:
w_off = weights[1]

In [None]:
b_off = biases[1]

In [None]:
import numpy as np

# Dimensions
d_in = 2     # example input dimension
d_hidden = 1   # number of hidden units

# Initialize w1 and b1 from N(0, 1)
w1 = np.random.normal(loc=0.0, scale=1.0, size=(d_hidden, d_in))
b1 = np.random.normal(loc=0.0, scale=1.0, size=(d_hidden, 1))

print(w1)


[[-0.27924659  0.36512713]]


In [None]:
w2 = w1 + w_off
print(w2)

[[-0.77700951 -0.13263571]]


In [None]:
b2 = b1 + b_off

In [None]:
weight_2nd_layer = np.vstack([w1, w2])

In [None]:
biases_2nd_layer = np.vstack([b1, b2])

In [None]:
print(biases_2nd_layer)

[[-0.90644664]
 [-0.7248949 ]]


In [None]:
print

In [None]:
wights_1st_layer = weights[0]
biases_1st_layer = biases[0]

In [None]:
model_extract = TwoOutputNN()

model_extract.fc1.weight.data = torch.from_numpy(wights_1st_layer).double()
model_extract.fc1.bias.data   = torch.from_numpy(biases_1st_layer.reshape(-1)).double()

model_extract.fc2.weight.data = torch.from_numpy(weight_2nd_layer).double()
model_extract.fc2.bias.data   = torch.from_numpy(biases_2nd_layer.reshape(-1)).double()

In [None]:
print("fc1.weight.shape:", model_extract.fc1.weight.data.shape, "(expected:", (hidden_size, 784), ")")
print("fc1.bias.shape:  ", model_extract.fc1.bias.data.shape,   "(expected:", (hidden_size,), ")")

print("fc2.weight.shape:", model_extract.fc2.weight.data.shape, "(expected:", (output_size, hidden_size), ")")
print("fc2.bias.shape:  ", model_extract.fc2.bias.data.shape,   "(expected:", (output_size,), ")")

fc1.weight.shape: torch.Size([2, 784]) (expected: (2, 784) )
fc1.bias.shape:   torch.Size([2]) (expected: (2,) )
fc2.weight.shape: torch.Size([2, 2]) (expected: (2, 2) )
fc2.bias.shape:   torch.Size([2]) (expected: (2,) )


In [None]:
# === Evaluation ===
model_extract.eval()
correct, total = 0, 0
with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.double()
        out = model_extract(xb)
        pred = torch.argmax(out, dim=1)
        correct += (pred == yb).sum().item()
        total += yb.size(0)

print(f"Hard-label accuracy: {100 * correct / total:.2f}%")

✅ Hard-label accuracy: 99.67%


# Three Classes

In [89]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import numpy as np

# === Config ===
hidden_size = 2
output_size = 3  # 3 classes: 0, 1, 3 mapped to 0, 1, 2
epochs = 10
batch_size = 128

# === Label mapping function ===
def remap_targets(dataset, label_map):
    targets = np.array(dataset.targets)
    mapped_indices = np.where(np.isin(targets, list(label_map.keys())))[0]
    dataset.targets = torch.tensor([label_map[int(y)] for y in targets[mapped_indices]])
    dataset.data = dataset.data[mapped_indices]
    return dataset

# === Transform ===
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),
    transforms.Lambda(lambda x: x / 100),
    transforms.Lambda(lambda x: x.view(-1).double())
])

# === Load Data ===
label_map = {0: 0, 1: 1, 3: 2}  # map original labels to 0-based
train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_data  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_data = remap_targets(train_data, label_map)
test_data  = remap_targets(test_data, label_map)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_data, batch_size=batch_size)

# === Model ===
class TwoOutputNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, hidden_size, dtype=torch.float64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size, dtype=torch.float64)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = TwoOutputNN().double()

# === Training ===
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(epochs):
    model.train()
    for xb, yb in train_loader:
        xb = xb.double()
        yb = yb.long()
        out = model(xb)
        loss = criterion(out, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1} - Loss: {loss.item():.4f}")

# === Evaluation ===
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.double()
        out = model(xb)
        pred = torch.argmax(out, dim=1)
        correct += (pred == yb).sum().item()
        total += yb.size(0)

print(f"Hard-label accuracy: {100 * correct / total:.2f}%")

Epoch 1 - Loss: 1.0964
Epoch 2 - Loss: 0.9889
Epoch 3 - Loss: 0.8386
Epoch 4 - Loss: 0.7846
Epoch 5 - Loss: 0.7155
Epoch 6 - Loss: 0.6398
Epoch 7 - Loss: 0.6501
Epoch 8 - Loss: 0.5374
Epoch 9 - Loss: 0.5722
Epoch 10 - Loss: 0.4953
Hard-label accuracy: 73.15%


In [None]:
print(fc2_weight.shape)

(3, 2)


In [90]:
# === Collapse output: class1 - class0 ===
fc2_weight = model.fc2.weight.detach().cpu().numpy()       # shape: (2, 2)
fc2_bias   = model.fc2.bias.detach().cpu().numpy().reshape(-1, 1)  # shape: (2, 1)

# Compute final collapsed layer: w = w1 - w0, b = b1 - b0
collapsed_w = (fc2_weight[0] - fc2_weight[1]).reshape(1, hidden_size)  # (1, 2)
collapsed_b = (fc2_bias[0]   - fc2_bias[1]).reshape(1, 1)              # (1, 1)

# === Save as [784 → 2 → 1] model ===
weights = [
    model.fc1.weight.detach().cpu().numpy(),  # shape: (2, 784)
    collapsed_w                               # shape: (1, 2)
]
biases = [
    model.fc1.bias.detach().cpu().numpy().reshape(-1, 1),  # shape: (2, 1)
    collapsed_b                                            # shape: (1, 1)
]

np.savez('class1-2_3classes',
         arr_0=np.array(weights, dtype=object),
         arr_1=np.array(biases, dtype=object))

print(f"Model saved to class1-2_3classes")

Model saved to class1-2_3classes


In [91]:
# === Collapse output: class1 - class0 ===
fc2_weight = model.fc2.weight.detach().cpu().numpy()       # shape: (2, 2)
fc2_bias   = model.fc2.bias.detach().cpu().numpy().reshape(-1, 1)  # shape: (2, 1)

# Compute final collapsed layer: w = w1 - w0, b = b1 - b0
collapsed_w = (fc2_weight[1] - fc2_weight[2]).reshape(1, hidden_size)  # (1, 2)
collapsed_b = (fc2_bias[1]   - fc2_bias[2]).reshape(1, 1)              # (1, 1)

# === Save as [784 → 2 → 1] model ===
weights = [
    model.fc1.weight.detach().cpu().numpy(),  # shape: (2, 784)
    collapsed_w                               # shape: (1, 2)
]
biases = [
    model.fc1.bias.detach().cpu().numpy().reshape(-1, 1),  # shape: (2, 1)
    collapsed_b                                            # shape: (1, 1)
]

np.savez('class2-3_3classes',
         arr_0=np.array(weights, dtype=object),
         arr_1=np.array(biases, dtype=object))

print(f"Model saved to class2-3_3classes")

Model saved to class2-3_3classes


Checking leaky_relu


In [None]:
import numpy as np
from tensorflow.keras.datasets import mnist

# Load .npz file
data = np.load('./leaky_alpha_0.1.npz', allow_pickle=True)

# Inspect contents
print("Keys:", data.files)
ws = data['ws']  # list of weight matrices
bs = data['bs']  # list of bias vectors

# Let's print their shapes
for i, (w, b) in enumerate(zip(ws, bs)):
    print(f"Layer {i}: W.shape = {w.shape}, b.shape = {b.shape}")

# Transpose weight matrices
W1 = ws[0].T  # (784, 2)
W2 = ws[1].T  # (2, 1)

# Flatten biases
b1 = bs[0].reshape(-1)  # (2,)
b2 = bs[1].reshape(-1)  # (1,)


Keys: ['ws', 'bs']
Layer 0: W.shape = (2, 784), b.shape = (2, 1)
Layer 1: W.shape = (1, 2), b.shape = (1, 1)


In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def predict(X):
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)
    return a2


In [None]:
(_, _), (x_test, y_test) = mnist.load_data()
x_test = x_test.reshape(-1, 784).astype(np.float32) / 255.0

# Keep only 0 and 1
mask = (y_test == 0) | (y_test == 1)
x_test = x_test[mask]
y_test = y_test[mask].reshape(-1, 1)


In [None]:
y_pred = predict(x_test)
y_pred_labels = (y_pred >= 0.5).astype(int)

accuracy = np.mean(y_pred_labels == y_test)
print(f"Accuracy on MNIST (0 vs 1): {accuracy * 100:.2f}%")


Accuracy on MNIST (0 vs 1): 53.66%


  return 1 / (1 + np.exp(-x))


In [None]:
import numpy as np
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
from google.colab import files

# # === Upload the .npz file ===
# uploaded = files.upload()

# === Load weights and biases from .npz ===
data = np.load("leaky_alpha_0.1.npz", allow_pickle=True)
W1 = data['ws'][0]               # shape: (2, 784)
W2 = data['ws'][1]               # shape: (1, 2)
b1 = data['bs'][0].reshape(-1)   # shape: (2,)
b2 = data['bs'][1].reshape(-1)   # shape: (1,)

# === Define activation functions ===
def leaky_relu(x, alpha=0.1):
    return np.where(x > 0, x, alpha * x)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# === Forward pass / prediction ===
def predict(X):
    z1 = np.dot(X, W1.T) + b1          # Hidden layer: (N, 2)
    a1 = leaky_relu(z1, alpha=0.01)
    z2 = np.dot(a1, W2.T) + b2         # Output layer: (N, 1)
    a2 = sigmoid(z2)                  # Convert logits to probability
    return a2

# === Data preprocessing ===
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),  # Normalize using MNIST mean and std
    transforms.Lambda(lambda x: x / 100),        # Divide by 100 (as per training)
    transforms.Lambda(lambda x: x.view(-1).double())  # Flatten and convert to float64
])

# Load MNIST test data (only digits 0 and 1)
test_data = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
test_idx = [i for i, (_, y) in enumerate(test_data) if y in [0, 1]]
filtered_test = Subset(test_data, test_idx)

# Prepare test dataset arrays
x_test, y_test = [], []
for i in range(len(filtered_test)):
    x, y = filtered_test[i]
    x_test.append(x.numpy())
    y_test.append(y)

x_test = np.array(x_test)     # shape: (N, 784)
y_test = np.array(y_test)     # shape: (N,)

# === Predict and evaluate accuracy ===
y_pred = predict(x_test)
y_pred_labels = (y_pred >= 0.5).astype(int).reshape(-1)
accuracy = np.mean(y_pred_labels == y_test)

print(f"✅ Accuracy on MNIST (digits 0 vs 1): {accuracy * 100:.2f}%")


✅ Accuracy on MNIST (digits 0 vs 1): 99.67%


# 2 Layer Deep Networks

In [92]:
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Subset
import torch
import torch.nn as nn
import torch.optim as optim

# === Config ===
hidden_sizes = [2, 2]
epochs = 10
batch_size = 128

# === Data ===
transform = transforms.Compose([
    transforms.CenterCrop(6),  # From 28x28 to 6x6
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),
    transforms.Lambda(lambda x: x / 100),  # Divide by 100 after normalization
    transforms.Lambda(lambda x: x.view(-1)[:32].double())  # Flatten and take first 32 pixels
])

train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_data  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Filter only digits 0 and 1
train_idx = [i for i, (_, y) in enumerate(train_data) if y in [0, 1]]
test_idx  = [i for i, (_, y) in enumerate(test_data)  if y in [0, 1]]

train_loader = DataLoader(Subset(train_data, train_idx), batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(Subset(test_data,  test_idx),  batch_size=batch_size)

# === Model ===
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.fc1 = nn.Linear(32, hidden_sizes[0], dtype=torch.float64)
        self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1], dtype=torch.float64)
        self.fc3 = nn.Linear(hidden_sizes[1], 1, dtype=torch.float64)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = MyModel().double()

# === Training ===
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(epochs):
    model.train()
    for xb, yb in train_loader:
        xb = xb.double()
        yb = yb.double().unsqueeze(1)
        out = model(xb)
        loss = criterion(out, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch + 1} - Loss: {loss.item():.4f}")

# === Evaluation ===
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.double()
        out = model(xb)
        pred = (out > 0).int().squeeze()
        correct += (pred == yb.int()).sum().item()
        total += yb.size(0)

print(f"Hard-label accuracy: {100 * correct / total:.2f}%")

# === Save weights & biases ===
weights = [
    model.fc1.weight.detach().cpu().numpy(),
    model.fc2.weight.detach().cpu().numpy(),
    model.fc3.weight.detach().cpu().numpy()
]
biases = [
    model.fc1.bias.detach().cpu().numpy().reshape(-1, 1),
    model.fc2.bias.detach().cpu().numpy().reshape(-1, 1),
    model.fc3.bias.detach().cpu().numpy().reshape(-1, 1)
]


Epoch 1 - Loss: 0.6779
Epoch 2 - Loss: 0.6719
Epoch 3 - Loss: 0.6459
Epoch 4 - Loss: 0.6316
Epoch 5 - Loss: 0.5659
Epoch 6 - Loss: 0.4943
Epoch 7 - Loss: 0.4257
Epoch 8 - Loss: 0.3894
Epoch 9 - Loss: 0.3259
Epoch 10 - Loss: 0.2659
Hard-label accuracy: 99.20%


In [93]:
# Save as .npz with arr_0 (weights), arr_1 (biases)
save_path = "./relu_mnist_hidden_32_2_2_1.npz"
np.savez(save_path,
         arr_0=np.array(weights, dtype=object),
         arr_1=np.array(biases, dtype=object))

print(f"Model saved to: {os.path.abspath(save_path)}")

Model saved to: /content/relu_mnist_hidden_32_2_2_1.npz
