Import necessary modules

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import random

In [2]:
def set_seed(seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed()

Load dataset

In [3]:
# Load Digits dataset (1797 samples, 64 features)
digits = load_digits()
X = digits.data
y = digits.target

Standardize the dataset

In [4]:
scaler = StandardScaler()
X = scaler.fit_transform(X)


Train-Test split

In [5]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


Convert to tensors

In [6]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

Define the neural network

In [7]:
class DigitNN(nn.Module):
  def __init__(self):
      super(DigitNN, self).__init__()
      self.fc1 = nn.Linear(64, 128)
      self.fc2 = nn.Linear(128, 64)
      self.fc3 = nn.Linear(64, 10)

  def forward(self, x):
      x = F.relu(self.fc1(x))
      x = F.relu(self.fc2(x))
      x = self.fc3(x)
      return x

Train and Evaluate the model

In [8]:
def train_and_evaluate(model):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

    model.train()
    for epoch in range(100):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        predictions = model(X_test).argmax(dim=1).numpy()
        accuracy = accuracy_score(y_test, predictions)

    return accuracy

Zero weight Initialization

In [9]:
model_zero = DigitNN()

# Zero initialization
for param in model_zero.parameters():
    nn.init.constant_(param, 0.0)

acc_zero = train_and_evaluate(model_zero)
print(f"Accuracy with Zero Initialization: {acc_zero * 100:.2f}%")


Accuracy with Zero Initialization: 7.78%


High-Value Random Initialization

In [10]:
model_high = DigitNN()
set_seed()
# High-value random initialization (mean=5, std=3)
for param in model_high.parameters():
    nn.init.normal_(param, mean=5.0, std=3.0)

acc_high = train_and_evaluate(model_high)
print(f"Accuracy with High-Value Initialization: {acc_high * 100:.2f}%")

Accuracy with High-Value Initialization: 41.11%


Low-Value Random Initialization

In [11]:
model_low = DigitNN()
set_seed()
# Low-value random initialization (mean=0.2, std=0.05)
for param in model_low.parameters():
    nn.init.normal_(param, mean=0.2, std=0.05)

acc_low = train_and_evaluate(model_low)
print(f"Accuracy with Low-Value Initialization: {acc_low * 100:.2f}%")

Accuracy with Low-Value Initialization: 91.94%


He Initialization (Kaiming)

In [12]:
model_he = DigitNN()
set_seed()
# He initialization
for name, param in model_he.named_parameters():
    if 'weight' in name:
        nn.init.kaiming_normal_(param, nonlinearity='relu')
    elif 'bias' in name:
        nn.init.constant_(param, 0.0)

acc_he = train_and_evaluate(model_he)
print(f"Accuracy with He Initialization: {acc_he * 100:.2f}%")

Accuracy with He Initialization: 97.22%


Xavier Initialization

In [13]:
model_xavier = DigitNN()
set_seed()
# Xavier initialization
for name, param in model_xavier.named_parameters():
    if 'weight' in name:
        nn.init.xavier_normal_(param)
    elif 'bias' in name:
        nn.init.constant_(param, 0.0)

acc_xavier = train_and_evaluate(model_xavier)
print(f"Accuracy with Xavier Initialization: {acc_xavier * 100:.2f}%")

Accuracy with Xavier Initialization: 97.78%
