In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset


In [7]:
class CustomLinear(nn.Module):
    def __init__(self, input_size, output_size):

        # Self initialize is required for using nn.Module
        super(CustomLinear, self).__init__()

        # Initialize weights and biases and flag as params by nn.Parameter
        self.weight = torch.nn.Parameter(torch.randn(input_size, output_size))
        self.bias = torch.nn.Parameter(torch.zeros(output_size))

    def forward(self, x):

        # Execute a forward pass in a single layer
        return x @ self.weight + self.bias

class CustomNet(nn.Module):
    def __init__(self, width):

        # Self initialize is required for using nn.Module
        super(CustomNet, self).__init__()

        # Initialize empty layer list
        self.list_layers = []

        # Number of layers 
        n_layers = len(width) - 1

        # Append each layer to layer list
        for id in range(n_layers):
            input_size, output_size = width[id], width[id+1]
            self.list_layers.append(CustomLinear(input_size, output_size))

        # Modify list layer to ModuleList object
        self.list_layers = nn.ModuleList(self.list_layers)
    
    def forward(self, x):

        # Execute forward pass and ReLU in each layer except last
        for layer in self.list_layers[:-1]:
            x = layer.forward(x)
            x = F.relu(x)

        # Only execute forward pass in the last layer
        x = self.list_layers[-1].forward(x)
        # x = F.softmax(x, dim=1)

        return x


In [None]:
# CustomNet([3, 5, 2])
torch.manual_seed(0)
X = torch.rand(1000, 3)  # 1000 samples, 3 features each
y = torch.randint(0, 2, (1000,))  # Binary classification (0 or 1)

# Create DataLoader for batching
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
dataloader

model = CustomNet([3, 6, 2])

for batch_X, batch_y in dataloader:
    # Forward pass
    predictions = model.forward(batch_X)
    
predictions

tensor([[ 0.0707,  0.2669],
        [ 0.5908, -0.8883],
        [ 0.1134,  0.5694],
        [ 0.4948, -0.4567],
        [-0.1629,  0.2921],
        [ 0.4560, -0.7879],
        [ 0.6345, -1.2023],
        [ 0.5387, -0.2367]], grad_fn=<AddBackward0>)

In [9]:
def test_acc():
    return torch.mean((torch.argmax(model(dataset['test_input']), dim=1) == dataset['test_label']).type(dtype))

def cross_entropy_loss(predictions, targets):
    # Apply softmax to predictions and calculate log probabilities
    log_probs = F.log_softmax(predictions, dim=1)
    return -torch.mean(log_probs[torch.arange(len(targets)), targets])

In [36]:
# Hyperparameters
input_size = 3
hidden_size = 5
output_size = 2
learning_rate = 0.01
num_epochs = 100

# Generate synthetic data for demonstration
torch.manual_seed(0)
X = torch.rand(1000, input_size)  # 1000 samples, 3 features each
y = torch.randint(0, output_size, (1000,))  # Binary classification (0 or 1)

# Create DataLoader for batching
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Initialize the model
model = CustomNet([input_size, hidden_size, output_size])

# Define loss function
def loss_fn(predictions, targets, type='classification'):
    
    # Cross-entropy if classification
    if type == 'classification':
        loss_fn = torch.nn.CrossEntropyLoss()
    
    # Mean squared error if regression 
    elif type == 'regression':
        loss_fn = torch.nn.MSELoss()

    return loss_fn(predictions, targets)

# Training loop
for epoch in range(num_epochs):
    
    total_loss = 0

    for batch_X, batch_y in dataloader:
        # Forward pass
        predictions = model.forward(batch_X)
        
        # Compute loss
        loss = loss_fn(predictions, batch_y)
        
        # Backward pass
        loss.backward()

        # Update parameters manually
        with torch.no_grad():  # Disable gradient tracking during updates
            for param in model.parameters():
                param -= learning_rate * param.grad  # Gradient descent
                param.grad.zero_()  # Reset gradients after updating

        total_loss += loss.item()
    
    if epoch % 20 == (20-1):
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(dataloader):.4f}")

# Evaluation mode
model.eval = True
with torch.no_grad():
    predictions = model.forward(X)
    _, predicted_classes = torch.max(predictions, 1)
    accuracy = (predicted_classes == y).float().mean()
    print(f"Accuracy: {accuracy:.4f}")

Epoch [20/100], Loss: 0.6945
Epoch [40/100], Loss: 0.6940
Epoch [60/100], Loss: 0.6928
Epoch [80/100], Loss: 0.6915
Epoch [100/100], Loss: 0.6913
Accuracy: 0.5300


In [38]:
list(model.parameters())

[Parameter containing:
 tensor([[ 0.3952, -0.0571, -0.4534,  0.0934,  0.3414],
         [ 0.3043, -0.0950,  0.7846, -0.3401, -0.1687],
         [ 0.1813,  0.3657, -0.9579, -0.0737, -0.2886]], requires_grad=True),
 Parameter containing:
 tensor([-0.0771,  0.0107, -0.4503, -0.0901, -0.2612], requires_grad=True),
 Parameter containing:
 tensor([[-0.3313,  0.5072],
         [-0.6385, -1.1488],
         [-0.2219,  2.1029],
         [ 2.2959, -0.2719],
         [ 1.4996, -0.5152]], requires_grad=True),
 Parameter containing:
 tensor([ 0.1288, -0.1288], requires_grad=True)]