In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F

# Deep Learning Utils

In [2]:
class ConfigurableMLP(nn.Module):
    def __init__(self, config):
        super(ConfigurableMLP, self).__init__()
        layers = []
        input_size = config['input_size']
        
        for layer_idx, hidden_size in enumerate(config['hidden_sizes']):
            layers.append(nn.Linear(input_size, hidden_size))
            activation = config['activations'][layer_idx]
            if activation == 'relu':
                layers.append(nn.ReLU())
            elif activation == 'tanh':
                layers.append(nn.Tanh())
            elif activation == 'sigmoid':
                layers.append(nn.Sigmoid())
            elif activation == 'none':  # No activation
                pass
            else:
                raise ValueError(f"Unsupported activation: {activation}")

            input_size = hidden_size
        
        # Final output layer
        layers.append(nn.Linear(input_size, config['output_size']))
        final_activation = config.get('final_activation', 'none')
        if final_activation == 'softmax':
            layers.append(nn.Softmax(dim=1))
        elif final_activation == 'none':
            pass
        else:
            raise ValueError(f"Unsupported activation: {final_activation}")
        
        self.network = nn.Sequential(*layers)

        print(self)


    def forward(self, x):
        return self.network(x)


In [3]:
class ConfigurableCNN(nn.Module):
    def __init__(self, config):
        super(ConfigurableCNN, self).__init__()
        layers = []
        input_channels = config['input_channels']
        
        # CNN layers
        for _, layer_config in enumerate(config['conv_layers']):
            layers.append(nn.Conv2d(
                input_channels,
                layer_config['out_channels'],
                kernel_size=layer_config['kernel_size'],
                stride=layer_config.get('stride', 1),
                padding=layer_config.get('padding', 0)
            ))
            activation = layer_config['activation']
            if activation == 'relu':
                layers.append(nn.ReLU())
            elif activation == 'tanh':
                layers.append(nn.Tanh())
            elif activation == 'sigmoid':
                layers.append(nn.Sigmoid())
            elif activation == 'none':
                pass
            else:
                raise ValueError(f"Unsupported activation: {activation}")
            
            input_channels = layer_config['out_channels']
        
        self.conv = nn.Sequential(*layers)
        
        # Fully connected layer
        self.fc = nn.Sequential(
            nn.Linear(config['flattened_size'], config['output_size'])
        )
        final_activation = config.get('final_activation', 'none')
        if final_activation == 'softmax':
            self.fc.add_module("final_softmax", nn.Softmax(dim=1))
        elif final_activation == 'none':
            pass
        else:
            raise ValueError(f"Unsupported activation: {final_activation}")

        print(self)


    def forward(self, x):
        x = self.conv(x)
        x = torch.flatten(x, 1)
        return self.fc(x)

# Training Examples

**Note on Loss Calculation**
 - regression tasks -> e.g. MSELoss (Mean Squared Error)
 - classification tasks -> e.g. CrossEntropyLoss (Cross Entropy)

In [4]:
# Training Loop
def train_model(model, loader, criterion, optimizer, epochs=5):
    model.train()   # make sure that weights are not frozen

    for epoch in range(epochs):
        running_loss = 0.0
        
        # Loop over each batch
        for inputs, targets in loader:
            optimizer.zero_grad()                   # clear all gradients before backpropagation
            outputs = model(inputs)                 # forward pass
            loss = criterion(outputs, targets)      # calculate the loss (error) using a specified loss function
            loss.backward()                         # compute the gradients of the loss with respect to all the model's parameters using backpropagation
            optimizer.step()                        # update the models weights and biases
 
            running_loss += loss.item()

        print(f"Epoch {epoch+1}, Loss: {running_loss/len(loader):.4f}")


# Create dummy data for regression
def create_dummy_data(input_shape, num_samples, output_size):
    x = torch.rand(num_samples, *input_shape)
    y = torch.randint(0, output_size, (num_samples,)) if output_size > 1 else torch.rand(num_samples, 1)
    return x, y

## MLP Training Example

In [5]:
##
## Regression Example
##

# Generate Dummy Data 
mlp_regression_X, mlp_regression_y = create_dummy_data((20,), 100, 1)  # 20 input features, 1 continuous output
mlp_regression_dataset = TensorDataset(mlp_regression_X, mlp_regression_y)
mlp_regression_loader = DataLoader(mlp_regression_dataset, batch_size=16, shuffle=True)

# Setup Training
mlp_regression_config = {
    'input_size': 20,                   # Number of input features
    'hidden_sizes': [64, 32],           # Two hidden layers with 64 and 32 neurons
    'activations': ['relu', 'relu'],    # Activation for each hidden layer
    'output_size': 1,                   # Regression task: Single continuous output
    'final_activation': 'none'          # No activation for the final layer
}

mlp_regression_model = ConfigurableMLP(mlp_regression_config)
mlp_regression_criterion = nn.MSELoss()  # Mean Squared Error for regression
mlp_regression_optimizer = optim.Adam(mlp_regression_model.parameters(), lr=0.001)

# Train the MLP
print("Training MLP for Regression...")
train_model(mlp_regression_model, mlp_regression_loader, mlp_regression_criterion, mlp_regression_optimizer)

ConfigurableMLP(
  (network): Sequential(
    (0): Linear(in_features=20, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)
Training MLP for Regression...
Epoch 1, Loss: 0.4986
Epoch 2, Loss: 0.4207
Epoch 3, Loss: 0.2991
Epoch 4, Loss: 0.1791
Epoch 5, Loss: 0.1034


In [6]:
##
## Classification Example
##

# Generate Dummy Data 
mlp_x, mlp_y = create_dummy_data((20,), 100, 3)  # 20 input features, 3 output classes
print(mlp_x.shape)
mlp_dataset = TensorDataset(mlp_x, mlp_y)
mlp_loader = DataLoader(mlp_dataset, batch_size=16, shuffle=True)

# Setup Training
mlp_config = {
    'input_size': 20,
    'hidden_sizes': [64, 32],           # Two hidden layers
    'activations': ['relu', 'relu'],    # Activation for each hidden layer
    'output_size': 3,                   # For classification, 3 classes
    'final_activation': 'softmax'       # Final activation is softmax
}
mlp_model = ConfigurableMLP(mlp_config)
mlp_criterion = nn.CrossEntropyLoss()  # Suitable for classification
mlp_optimizer = optim.Adam(mlp_model.parameters(), lr=0.001)

# Train the MLP
print("Training MLP for Classification...")
train_model(mlp_model, mlp_loader, mlp_criterion, mlp_optimizer)

torch.Size([100, 20])
ConfigurableMLP(
  (network): Sequential(
    (0): Linear(in_features=20, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=3, bias=True)
    (5): Softmax(dim=1)
  )
)
Training MLP for Classification...
Epoch 1, Loss: 1.0990
Epoch 2, Loss: 1.0989
Epoch 3, Loss: 1.0963
Epoch 4, Loss: 1.0960
Epoch 5, Loss: 1.0950


In [7]:
##
## Regression Example
##

# Generate dummy regression data
cnn_regression_X, cnn_regression_y = create_dummy_data((3, 32, 32), 100, 1)     # 3-channel 32x32 images, 1 continuous output
cnn_regression_dataset = TensorDataset(cnn_regression_X, cnn_regression_y)
cnn_regression_loader = DataLoader(cnn_regression_dataset, batch_size=16, shuffle=True)

cnn_regression_config = {
    'input_channels': 3,                # Number of input channels (e.g., RGB images)
    'conv_layers': [
        {'out_channels': 16, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'relu'},
        {'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'tanh'}
    ],
    'flattened_size': 32 * 32 * 32,     # Input size flattened after convolution (32x32 input size)
    'output_size': 1,                   # Regression task: Single continuous output
    'final_activation': 'none'          # No activation for the final layer
}

cnn_regression_model = ConfigurableCNN(cnn_regression_config)
cnn_regression_criterion = nn.MSELoss()  # Mean Squared Error for regression
cnn_regression_optimizer = optim.Adam(cnn_regression_model.parameters(), lr=0.001)

# Train the CNN
print("\nTraining CNN for Regression...")
train_model(cnn_regression_model, cnn_regression_loader, cnn_regression_criterion, cnn_regression_optimizer)


ConfigurableCNN(
  (conv): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): Tanh()
  )
  (fc): Sequential(
    (0): Linear(in_features=32768, out_features=1, bias=True)
  )
)

Training CNN for Regression...
Epoch 1, Loss: 0.6076
Epoch 2, Loss: 0.2010
Epoch 3, Loss: 0.1165
Epoch 4, Loss: 0.0643
Epoch 5, Loss: 0.0330


In [8]:
##
## Classification Example
##

# Create Dummy Data
cnn_x, cnn_y = create_dummy_data((3, 32, 32), 100, 3)               # 3 channels, 32x32 images, 3 output classes
cnn_dataset = TensorDataset(cnn_x, cnn_y)
cnn_loader = DataLoader(cnn_dataset, batch_size=16, shuffle=True)

# Setup Training
cnn_config = {
    'input_channels': 3,
    'conv_layers': [
        {'out_channels': 16, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'relu'},
        {'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'tanh'}
    ],
    'flattened_size': 32 * 32 * 32,     # Input size (32x32) flattened after final conv layer
    'output_size': 3,                   # For classification, 3 classes
    'final_activation': 'softmax'       # Final activation is softmax
}
cnn_model = ConfigurableCNN(cnn_config)
cnn_criterion = nn.CrossEntropyLoss()  # Suitable for classification
cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

print("\nTraining CNN for Classification...")
train_model(cnn_model, cnn_loader, cnn_criterion, cnn_optimizer)

ConfigurableCNN(
  (conv): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): Tanh()
  )
  (fc): Sequential(
    (0): Linear(in_features=32768, out_features=3, bias=True)
    (final_softmax): Softmax(dim=1)
  )
)

Training CNN for Classification...
Epoch 1, Loss: 1.1947
Epoch 2, Loss: 1.0755
Epoch 3, Loss: 1.0473
Epoch 4, Loss: 0.9272
Epoch 5, Loss: 0.9047
