In [None]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [None]:
class LinearNN(nn.Module):
    def __init__(self):
        super(LinearNN, self).__init__()
        self.fc = nn.Linear(2,3)
        self._initialize_weight()
    def _initialize_weight(self):
        torch.nn.init.kaiming_uniform_(self.fc.weight, nonlinearity='relu')
        if self.fc.bias is not None:
            torch.nn.init.constant_(self.fc.bias, 0)
    def forward(self, x):
        return self.fc(x)
    
class NonlinearNN(nn.Module):
    def __init__(self):
        super(NonlinearNN, self).__init__()
        self.hidden = nn.Linear(2,5)
        self.relu = nn.ReLU()
        self.output = nn.Linear(5,3)
    def _initialize_weight(self):
        torch.nn.init.kaiming_uniform_(self.hidden.weight, nonlinearity='relu')
        torch.nn.init.kaiming_uniform_(self.output.weight, nonlinearity='relu')
        if self.hidden.bias is not None:
            torch.nn.init.zeros_(self.hidden.bias)
        if self.output.bias is not None:
            torch.nn.init.zeros_(self.output.bias)    
    def forward(self, x):
        return self.output(self.relu(self.hidden(x)))


def load_dataset(file_path):
    data = np.loadtxt(file_path)
    X = data[:, 1:]
    y = data[:, 0] - 1  # Convert to zero-indexed
    return X, y


def train(model, X_train, y_train, optimizer, criterion=nn.CrossEntropyLoss(), epochs=1000):
    losses = []
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    return losses
        
        
def evaluate(model, X, y):
    model.eval()
    with torch.no_grad():
        outputs = model(X)
        _, predicted = torch.max(outputs, 1)
        accuracy = (predicted == y).float().mean().item()
    return accuracy
    
        
def plot_decision_boundary(model, X, y, title):
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
    grid = torch.tensor(np.c_[xx.ravel(), yy.ravel()], dtype=torch.float32)
    model.eval()
    with torch.no_grad():
        Z = model(grid)
        _, Z = torch.max(Z, 1)
        Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, alpha=0.5, cmap=plt.cm.get_cmap("Spectral"))
    plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', marker='o', cmap=plt.cm.get_cmap("Spectral"))
    plt.title(title)
    plt.show()

In [None]:
# load dataset
X_train, y_train = load_dataset('iris-train.txt')
X_test, y_test = load_dataset('iris-test.txt')
# Preprocess data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

In [None]:
# Initialize models
linear_model = LinearNN()
nonlinear_model = NonlinearNN()
criterion = nn.CrossEntropyLoss()
linear_optimizer = optim.AdamW(linear_model.parameters(), lr=0.01)
nonlinear_optimizer = optim.AdamW(nonlinear_model.parameters(), lr=0.01)

# Train models
linear_losses = train(linear_model, X_train, y_train, linear_optimizer)
nonlinear_losses = train(nonlinear_model, X_train, y_train, nonlinear_optimizer)

In [None]:
# Plot curves
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

axes[0].plot(linear_losses, label='Linear NN', color='blue')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Loss')
axes[0].set_title('Linear NN Training Loss_kaiming')
axes[0].legend()

axes[1].plot(nonlinear_losses, label='Nonlinear NN', color='green')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Loss')
axes[1].set_title('Nonlinear NN Training Loss_kaiming')
axes[1].legend()

plt.tight_layout()
plt.show()

In [None]:
# Evaluate accuracy
linear_train_acc = evaluate(linear_model, X_train, y_train)
linear_test_acc = evaluate(linear_model, X_test, y_test)
nonlinear_train_acc = evaluate(nonlinear_model, X_train, y_train)
nonlinear_test_acc = evaluate(nonlinear_model, X_test, y_test)
print(f"\\begin{{tabular}}{{c|c|c}}")
print(f"Model & Train Accuracy & Test Accuracy \\\\ \\hline")
print(f"Linear NN & {linear_train_acc:.2f} & {linear_test_acc:.2f} \\\\")
print(f"Nonlinear NN & {nonlinear_train_acc:.2f} & {nonlinear_test_acc:.2f} \\\\")
print(f"\\end{{tabular}}")

In [None]:
# Plot decision boundary
plot_decision_boundary(linear_model, X_train.numpy(), y_train.numpy(), 'Linear NN')
plot_decision_boundary(nonlinear_model, X_test.numpy(), y_test.numpy(), 'Nonlinear NN')