<a href="https://colab.research.google.com/github/Shahi77/DeepLearning_Assignments/blob/main/Assignment04.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Implement the dropout function for a single layer. Draw samples from the uniform distribution S[0,1] . Keep those nodes for which the corresponding sample is greater than p probability , dropping the rest. Implement a dropout_layer function that drops out the elements in the tensor input X with probability dropout, rescaling the remainder. Test the dropout layer with few examples

In [None]:
import torch
import torch.nn.functional as F

def dropout_layer(X: torch.Tensor, dropout: float) -> torch.Tensor:
    """
    Applies dropout to a given input tensor X.

    Parameters:
    X (torch.Tensor): Input tensor.
    dropout (float): Dropout probability (0 <= dropout < 1). Higher means more units dropped.

    Returns:
    torch.Tensor: Tensor after applying dropout (scaled appropriately).
    """
    assert 0 <= dropout < 1, "Dropout probability must be in the range [0, 1)."

    if dropout == 0:
        return X  # No dropout applied

    # Generate a mask with the same shape as X, using uniform distribution
    mask = (torch.rand_like(X) > dropout).float()

    # Scale the remaining elements
    output = (X * mask) / (1.0 - dropout)

    # Debugging info
    print("Original Input:")
    print(X)
    print("Generated Mask:")
    print(mask)
    print("Dropout Applied Output:")
    print(output)
    print(f"Percentage of dropped values: {100 * (1 - mask.mean().item()):.2f}%")
    print(f"Mean of input: {X.mean().item():.4f}, Mean of output: {output.mean().item():.4f}")
    print("-")

    return output

# Testing the dropout layer with a few examples
if __name__ == "__main__":
    torch.manual_seed(42)  # For reproducibility

    # Example input tensor
    X = torch.tensor([[1.0, 2.0, 3.0],
                      [4.0, 5.0, 6.0],
                      [7.0, 8.0, 9.0]])

    dropout_rates = [0.0, 0.3, 0.5, 0.8]

    for p in dropout_rates:
        print(f"Dropout Probability: {p}")
        dropout_layer(X, p)


Dropout Probability: 0.0
Dropout Probability: 0.3
Original Input:
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])
Generated Mask:
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [0., 1., 1.]])
Dropout Applied Output:
tensor([[ 1.4286,  2.8571,  4.2857],
        [ 5.7143,  7.1429,  8.5714],
        [ 0.0000, 11.4286, 12.8571]])
Percentage of dropped values: 11.11%
Mean of input: 5.0000, Mean of output: 6.0317
-
Dropout Probability: 0.5
Original Input:
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])
Generated Mask:
tensor([[0., 1., 1.],
        [1., 1., 1.],
        [0., 1., 1.]])
Dropout Applied Output:
tensor([[ 0.,  4.,  6.],
        [ 8., 10., 12.],
        [ 0., 16., 18.]])
Percentage of dropped values: 22.22%
Mean of input: 5.0000, Mean of output: 8.2222
-
Dropout Probability: 0.8
Original Input:
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])
Generated Mask:
tensor([[0., 0., 0.],
        [0., 0., 1.],
        [0., 0., 0.

2. Implement Dropout layer in neural network model after every fully connected layer

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_prob):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.dropout1 = nn.Dropout(p=dropout_prob)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.dropout2 = nn.Dropout(p=dropout_prob)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

# Testing the neural network with dropout
if __name__ == "__main__":
    torch.manual_seed(42)  # For reproducibility

    # Example input tensor
    X = torch.randn(5, 10)  # Batch of 5 samples, 10 features each

    model = NeuralNetwork(input_size=10, hidden_size=20, output_size=5, dropout_prob=0.3)
    model.eval()  # Set model to evaluation mode (no dropout applied)
    print("Output without dropout (evaluation mode):")
    print(model(X))

    model.train()  # Set model to training mode (dropout applied)
    print("Output with dropout (training mode):")
    print(model(X))


Output without dropout (evaluation mode):
tensor([[ 0.0713, -0.0538,  0.0075, -0.3819, -0.2163],
        [-0.0048,  0.1738,  0.2743, -0.2746, -0.1560],
        [ 0.0465,  0.0078,  0.2278, -0.1960, -0.0924],
        [-0.0017,  0.0759,  0.2682, -0.2008, -0.1750],
        [ 0.0606,  0.0299,  0.2935, -0.2621, -0.1218]],
       grad_fn=<AddmmBackward0>)
Output with dropout (training mode):
tensor([[ 0.1122, -0.1865,  0.0692, -0.4563, -0.2607],
        [-0.0957,  0.4482,  0.2956, -0.2055, -0.1704],
        [ 0.0796,  0.1718,  0.2367, -0.2209, -0.1437],
        [-0.0208,  0.2398,  0.3445, -0.2174, -0.1471],
        [ 0.0470,  0.1374,  0.4373, -0.4032, -0.0424]],
       grad_fn=<AddmmBackward0>)


3. Visualization using wandb library for various experimental setups.

In [None]:
pip install torch torchvision torchtext wandb numpy

Collecting torchtext
  Downloading torchtext-0.18.0-cp311-cp311-manylinux1_x86_64.whl.metadata (7.9 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata

In [1]:
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mshahi77[0m ([33mshahi77-national-institute-of-technology-hamirpur[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [2]:
sweep_config = {
    'method': 'random'
    }

metric = {
    'name': 'validation_accuracy',
    'goal': 'maximize'
    }

sweep_config['metric'] = metric

parameters_dict= {
        'epochs': {'values': [10]},
        'lr': {'values': [0.001, 0.01]},
        'momentum': {'values': [0.9, 0.99]},
        'optimizer': {'values': ['sgd']},
        'batch_size': {'values': [64]},
        'weight_init': {'values': ['random']},
        'dropout_prob': {'values': [0.2, 0.3, 0.5]},  # Dropout probability between 20% to 50%
        'dropout_method': {'values': ['random', 'dropconnect', 'dropblock', 'maxdropout', 'biased_dropout', 'flipover']},
        'model': {'values': ['create_standard_network_1', 'create_standard_network_2', 'create_dropout_network_logistic', 'create_dropout_network_relu']}
    }

sweep_config['parameters'] = parameters_dict

import pprint
pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'maximize', 'name': 'validation_accuracy'},
 'parameters': {'batch_size': {'values': [64]},
                'dropout_method': {'values': ['random',
                                              'dropconnect',
                                              'dropblock',
                                              'maxdropout',
                                              'biased_dropout',
                                              'flipover']},
                'dropout_prob': {'values': [0.2, 0.3, 0.5]},
                'epochs': {'values': [10]},
                'lr': {'values': [0.001, 0.01]},
                'model': {'values': ['create_standard_network_1',
                                     'create_standard_network_2',
                                     'create_dropout_network_logistic',
                                     'create_dropout_network_relu']},
                'momentum': {'values': [0.9, 0.99]},
                'optimizer

In [3]:
sweep_id = wandb.sweep(sweep_config, project="Dropout04")

Create sweep with ID: qf1328ih
Sweep URL: https://wandb.ai/shahi77-national-institute-of-technology-hamirpur/Dropout04/sweeps/qf1328ih


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import wandb
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms

# Initialize device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Dataset building function
def build_dataset(batch_size):
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])  # Normalize for MNIST
    dataset = datasets.MNIST('.', train=True, download=True, transform=transform)
    # Split 10% for validation
    train_size = int(0.9 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, val_loader


# Neural Network Architecture with Dropout
class NeuralNetworkWithDropout(nn.Module):
    def __init__(self, input_size, hidden_layers, output_size, dropout_prob, activation_fn, init_method, max_threshold=None):
        super(NeuralNetworkWithDropout, self).__init__()
        self.init_method = init_method
        self.max_threshold = max_threshold

        layers = []

        # Input to first hidden layer
        layers.append(nn.Linear(input_size, hidden_layers[0]))
        layers.append(activation_fn())
        layers.append(nn.Dropout(dropout_prob))

        # Hidden layers with dropout
        for i in range(1, len(hidden_layers)):
            layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i]))
            layers.append(activation_fn())
            layers.append(nn.Dropout(dropout_prob))

        # Output layer
        layers.append(nn.Linear(hidden_layers[-1], output_size))

        self.network = nn.Sequential(*layers)
        self.apply(self._initialize_weights)

    def forward(self, x):
      # Flatten the input tensor
      x = x.view(x.size(0), -1)  # Flatten the 28x28 images into a 784 vector
      return self.network(x)


    def _initialize_weights(self, layer):
        if isinstance(layer, nn.Linear):
            if self.init_method == 'random':
                nn.init.normal_(layer.weight, mean=0, std=0.01)
            elif self.init_method == 'max_threshold':
                nn.init.normal_(layer.weight, mean=0, std=0.01)
                if self.max_threshold:
                    torch.clamp(layer.weight, max=self.max_threshold)
            elif self.init_method == 'pretraining':
                nn.init.normal_(layer.weight, mean=0, std=0.01)

            if layer.bias is not None:
                nn.init.constant_(layer.bias, 0)


# Experiment Configurations

# 1. StandardNeuralNet Logistic 2 layers, 100 units
def create_standard_network_1():
    return NeuralNetworkWithDropout(input_size=784, hidden_layers=[100, 100], output_size=10,
                                    dropout_prob=0.5, activation_fn=nn.Sigmoid, init_method='random')

# 2. StandardNeuralNet Logistic 2 layers, 800 units
def create_standard_network_2():
    return NeuralNetworkWithDropout(input_size=784, hidden_layers=[800, 800], output_size=10,
                                    dropout_prob=0.5, activation_fn=nn.Sigmoid, init_method='random')

# 3. DropoutNN Logistic 3 layers, 1024 units
def create_dropout_network_logistic():
    return NeuralNetworkWithDropout(input_size=784, hidden_layers=[1024, 1024, 1024], output_size=10,
                                    dropout_prob=0.5, activation_fn=nn.Sigmoid, init_method='random')

# 4. DropoutNN ReLU 3 layers, 1024 units
def create_dropout_network_relu():
    return NeuralNetworkWithDropout(input_size=784, hidden_layers=[1024, 1024, 1024], output_size=10,
                                    dropout_prob=0.5, activation_fn=nn.ReLU, init_method='random')


# DROPOUTS

# Define custom DropConnect layer
class DropConnect(nn.Module):
    def __init__(self, layer, p=0.5):
        super(DropConnect, self).__init__()
        self.layer = layer
        self.p = p

    def forward(self, x):
        if self.training:
            # DropConnect: Randomly zero out weights, not activations
            mask = (torch.rand_like(self.layer.weight) > self.p).float()
            weight = self.layer.weight * mask
            return F.linear(x, weight, self.layer.bias)
        else:
            return self.layer(x)

# Define custom DropBlock layer
class DropBlock(nn.Module):
    def __init__(self, p=0.5):
        super(DropBlock, self).__init__()
        self.p = p

    def forward(self, x):
        if self.training:
            # DropBlock: Randomly block entire blocks of activations
            block_size = int(x.size(1) * self.p)
            mask = torch.ones_like(x)
            mask[:, :block_size] = 0  # You can modify this logic to randomly block in more advanced ways
            x = x * mask
        return x

# Define Maxdropout (drop the largest activations)
class MaxDropout(nn.Module):
    def __init__(self, p=0.5):
        super(MaxDropout, self).__init__()
        self.p = p

    def forward(self, x):
        if self.training:
            # Drop the max activations
            top_k = int(x.size(1) * self.p)
            _, indices = torch.topk(x, top_k, dim=1, largest=True, sorted=False)
            mask = torch.zeros_like(x)
            mask.scatter_(1, indices, 1)
            x = x * mask
        return x

# Define Biased Dropout
class BiasedDropout(nn.Module):
    def __init__(self, p=0.5, bias=0.2):
        super(BiasedDropout, self).__init__()
        self.p = p
        self.bias = bias

    def forward(self, x):
        if self.training:
            # Biased Dropout: Apply biased dropout, where some neurons are more likely to be dropped
            prob = torch.full_like(x, self.p + self.bias)
            mask = (torch.rand_like(x) > prob).float()
            x = x * mask
        return x

# Define Flipover Dropout
class FlipoverDropout(nn.Module):
    def __init__(self, p=0.5):
        super(FlipoverDropout, self).__init__()
        self.p = p

    def forward(self, x):
        if self.training:
            # Flipover: Randomly negate the activations of dropped units
            mask = (torch.rand_like(x) > self.p).float()
            x = x * mask
            x = x - (x * mask)  # Negate the dropped values
        return x

# Main function to apply different dropout methods
def apply_dropout_method(model, method_name, dropout_prob=0.5):
    if method_name == "random":
        # Apply standard random dropout to each layer
        for module in model.children():
            if isinstance(module, nn.Linear):
                module.dropout = nn.Dropout(dropout_prob)
        return model

    if method_name == "dropconnect":
        # Apply DropConnect
        for module in model.children():
            if isinstance(module, nn.Linear):
                module = DropConnect(module, p=dropout_prob)
        return model

    if method_name == "dropblock":
        # Apply DropBlock
        for module in model.children():
            if isinstance(module, nn.Linear):
                module = DropBlock(p=dropout_prob)
        return model

    if method_name == "maxdropout":
        # Apply Maxdropout
        for module in model.children():
            if isinstance(module, nn.Linear):
                module = MaxDropout(p=dropout_prob)
        return model

    if method_name == "biased_dropout":
        # Apply Biased Dropout
        for module in model.children():
            if isinstance(module, nn.Linear):
                module = BiasedDropout(p=dropout_prob)
        return model

    if method_name == "flipover":
        # Apply Flipover Dropout
        for module in model.children():
            if isinstance(module, nn.Linear):
                module = FlipoverDropout(p=dropout_prob)
        return model

    # Default: no dropout
    return model


# Optimizer function
def get_optimizer(model, optimizer_name, lr, momentum=0, weight_decay=0):
    if optimizer_name == 'sgd':
        return optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    else:
        raise ValueError("Optimizer not supported")


# Training function
def train(model, train_loader, optimizer, criterion, epochs):
        config = wandb.config

        model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            correct = 0
            total = 0
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            wandb.log({
                "epoch": epoch + 1,
                "train_loss": running_loss / len(train_loader),
                "train_accuracy": 100 * correct / total,
                "trial_name": f"m_{config.model}_dr_{config.dropout_method}_p_{config.dropout_prob}lr_{config.lr}_m_{config.momentum}"  # Add trial name
            })

            print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}, Accuracy: {100 * correct / total}%")

# Evaluation function
def evaluate(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy


In [5]:
import wandb

def run_experiment():
    # Initialize a new wandb run
    with wandb.init() as run:
        # Access the sweep config from wandb
        config = wandb.config

        # Generate a custom trial name using hyperparameters from the config
        trial_name = f"m_{config.model}_dr_{config.dropout_method}_p_{config.dropout_prob}lr_{config.lr}_m_{config.momentum}"

        run.name = trial_name

        # Build dataset for training and validation
        train_loader, val_loader = build_dataset(config.batch_size)

        # Choose model based on config
        if config.model == 'create_standard_network_1':
            model = create_standard_network_1().to(device)
        elif config.model == 'create_standard_network_2':
            model = create_standard_network_2().to(device)
        elif config.model == 'create_dropout_network_logistic':
            model = create_dropout_network_logistic().to(device)
        elif config.model == 'create_dropout_network_relu':
            model = create_dropout_network_relu().to(device)
        else:
            raise ValueError(f"Unknown model: {config.model}")

        # Apply the selected dropout method
        model = apply_dropout_method(model, config.dropout_method, dropout_prob=config.dropout_prob)

        # Define loss and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = get_optimizer(model, config.optimizer, config.lr)

        # Train the model
        train(model, train_loader, optimizer, criterion, config.epochs)

        # Evaluate the model
        val_accuracy = evaluate(model, val_loader)
        wandb.log({"validation_accuracy": val_accuracy, "trial_name": trial_name})

In [6]:
# Run the sweep
wandb.agent(sweep_id, run_experiment,count=15)

[34m[1mwandb[0m: Agent Starting Run: v9tfq5la with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: random
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	model: create_standard_network_2
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:01<00:00, 5.26MB/s]


Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 152kB/s]


Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:01<00:00, 1.45MB/s]


Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 2.92MB/s]


Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw

Epoch 1, Loss: 2.3106267339245403, Accuracy: 10.50925925925926%
Epoch 2, Loss: 2.3095635893220585, Accuracy: 10.272222222222222%
Epoch 3, Loss: 2.309007144087299, Accuracy: 10.59074074074074%
Epoch 4, Loss: 2.3072172934410133, Accuracy: 10.707407407407407%
Epoch 5, Loss: 2.306970873432702, Accuracy: 10.616666666666667%
Epoch 6, Loss: 2.3057013447815775, Accuracy: 11.03888888888889%
Epoch 7, Loss: 2.3047259749394455, Accuracy: 11.00925925925926%
Epoch 8, Loss: 2.30427934801409, Accuracy: 10.91851851851852%
Epoch 9, Loss: 2.3035729874931805, Accuracy: 11.012962962962963%
Epoch 10, Loss: 2.3020088279981747, Accuracy: 11.298148148148147%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▃▁▃▄▃▆▆▅▆█
train_loss,█▇▇▅▅▄▃▃▂▁
validation_accuracy,▁

0,1
epoch,10
train_accuracy,11.29815
train_loss,2.30201
trial_name,m_create_standard_ne...
validation_accuracy,10.91667


[34m[1mwandb[0m: Agent Starting Run: p94e56ji with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: dropblock
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	model: create_standard_network_1
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Epoch 1, Loss: 2.3031999406656385, Accuracy: 10.62962962962963%
Epoch 2, Loss: 2.3030581592948516, Accuracy: 10.655555555555555%
Epoch 3, Loss: 2.3028893083757698, Accuracy: 10.829629629629629%
Epoch 4, Loss: 2.3029235169220876, Accuracy: 10.931481481481482%
Epoch 5, Loss: 2.3030373310025833, Accuracy: 10.692592592592593%
Epoch 6, Loss: 2.3027734796017834, Accuracy: 10.8%
Epoch 7, Loss: 2.3026011583364405, Accuracy: 10.940740740740742%
Epoch 8, Loss: 2.302071382649137, Accuracy: 11.12962962962963%
Epoch 9, Loss: 2.3022544787958337, Accuracy: 10.855555555555556%
Epoch 10, Loss: 2.302238217744782, Accuracy: 11.044444444444444%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▁▄▅▂▃▅█▄▇
train_loss,█▇▆▆▇▅▄▁▂▂
validation_accuracy,▁

0,1
epoch,10
train_accuracy,11.04444
train_loss,2.30224
trial_name,m_create_standard_ne...
validation_accuracy,11.75


[34m[1mwandb[0m: Agent Starting Run: rx06qchq with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: random
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	model: create_dropout_network_relu
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Epoch 1, Loss: 2.2852392380271476, Accuracy: 21.694444444444443%
Epoch 2, Loss: 1.416105817208923, Accuracy: 53.68703703703704%
Epoch 3, Loss: 0.574048833471339, Accuracy: 82.33703703703704%
Epoch 4, Loss: 0.40803594482947864, Accuracy: 87.87777777777778%
Epoch 5, Loss: 0.3271725147397597, Accuracy: 90.56111111111112%
Epoch 6, Loss: 0.27143678877290817, Accuracy: 91.95925925925926%
Epoch 7, Loss: 0.23015742094843025, Accuracy: 93.17962962962963%
Epoch 8, Loss: 0.19987236142546927, Accuracy: 94.07407407407408%
Epoch 9, Loss: 0.1781525617284409, Accuracy: 94.75185185185185%
Epoch 10, Loss: 0.16138505093726854, Accuracy: 95.27407407407408%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▇▇██████
train_loss,█▅▂▂▂▁▁▁▁▁
validation_accuracy,▁

0,1
epoch,10
train_accuracy,95.27407
train_loss,0.16139
trial_name,m_create_dropout_net...
validation_accuracy,95.81667


[34m[1mwandb[0m: Agent Starting Run: 9jxjxwf9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: maxdropout
[34m[1mwandb[0m: 	dropout_prob: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	model: create_standard_network_2
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Epoch 1, Loss: 2.3144466843085265, Accuracy: 10.58148148148148%
Epoch 2, Loss: 2.3067024460336043, Accuracy: 11.514814814814814%
Epoch 3, Loss: 2.292291136432033, Accuracy: 13.187037037037037%
Epoch 4, Loss: 2.235943135209558, Accuracy: 18.54074074074074%
Epoch 5, Loss: 1.9771226109204134, Accuracy: 31.746296296296297%
Epoch 6, Loss: 1.5355960661766088, Accuracy: 45.20925925925926%
Epoch 7, Loss: 1.2250512471131239, Accuracy: 56.105555555555554%
Epoch 8, Loss: 1.023503540322114, Accuracy: 63.56481481481482%
Epoch 9, Loss: 0.9123693003332446, Accuracy: 68.35740740740741%
Epoch 10, Loss: 0.834683044438396, Accuracy: 71.52962962962962%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▁▁▂▃▅▆▇██
train_loss,████▆▄▃▂▁▁
validation_accuracy,▁

0,1
epoch,10
train_accuracy,71.52963
train_loss,0.83468
trial_name,m_create_standard_ne...
validation_accuracy,77.7


[34m[1mwandb[0m: Agent Starting Run: utvy21jc with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: biased_dropout
[34m[1mwandb[0m: 	dropout_prob: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	model: create_standard_network_2
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Epoch 1, Loss: 2.3105598244621857, Accuracy: 10.253703703703703%
Epoch 2, Loss: 2.3093936138243474, Accuracy: 10.562962962962963%
Epoch 3, Loss: 2.308897952897854, Accuracy: 10.451851851851853%
Epoch 4, Loss: 2.3070224927499963, Accuracy: 10.714814814814815%
Epoch 5, Loss: 2.3070722211028727, Accuracy: 10.62037037037037%
Epoch 6, Loss: 2.3063156186686875, Accuracy: 10.942592592592593%
Epoch 7, Loss: 2.3055620255628466, Accuracy: 11.07037037037037%
Epoch 8, Loss: 2.3044907148415446, Accuracy: 11.166666666666666%
Epoch 9, Loss: 2.3024540676324854, Accuracy: 11.227777777777778%
Epoch 10, Loss: 2.302882396779354, Accuracy: 11.244444444444444%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▂▄▄▆▇▇██
train_loss,█▇▇▅▅▄▄▃▁▁
validation_accuracy,▁

0,1
epoch,10
train_accuracy,11.24444
train_loss,2.30288
trial_name,m_create_standard_ne...
validation_accuracy,11.06667


[34m[1mwandb[0m: Agent Starting Run: 8qgevo2y with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: random
[34m[1mwandb[0m: 	dropout_prob: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	model: create_dropout_network_logistic
[34m[1mwandb[0m: 	momentum: 0.99
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Epoch 1, Loss: 2.323109934962756, Accuracy: 10.196296296296296%
Epoch 2, Loss: 2.3208652037014894, Accuracy: 10.042592592592593%
Epoch 3, Loss: 2.320201059370809, Accuracy: 10.042592592592593%
Epoch 4, Loss: 2.319742131007226, Accuracy: 10.107407407407408%
Epoch 5, Loss: 2.317587066600673, Accuracy: 10.181481481481482%
Epoch 6, Loss: 2.3163350526755453, Accuracy: 10.424074074074074%
Epoch 7, Loss: 2.316274464412888, Accuracy: 10.248148148148148%
Epoch 8, Loss: 2.315778323541885, Accuracy: 10.414814814814815%
Epoch 9, Loss: 2.314827937367968, Accuracy: 10.298148148148147%
Epoch 10, Loss: 2.3139970376028267, Accuracy: 10.34074074074074%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▄▁▁▂▄█▅█▆▆
train_loss,█▆▆▅▄▃▃▂▂▁
validation_accuracy,▁

0,1
epoch,10
train_accuracy,10.34074
train_loss,2.314
trial_name,m_create_dropout_net...
validation_accuracy,10.65


[34m[1mwandb[0m: Agent Starting Run: uz4h0mlj with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: biased_dropout
[34m[1mwandb[0m: 	dropout_prob: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	model: create_standard_network_2
[34m[1mwandb[0m: 	momentum: 0.99
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Epoch 1, Loss: 2.3095516616134284, Accuracy: 10.531481481481482%
Epoch 2, Loss: 2.3087543178508634, Accuracy: 10.464814814814815%
Epoch 3, Loss: 2.3089560021721356, Accuracy: 10.47037037037037%
Epoch 4, Loss: 2.307067565160905, Accuracy: 10.625925925925927%
Epoch 5, Loss: 2.3064717887702146, Accuracy: 10.84074074074074%
Epoch 6, Loss: 2.3051281980993625, Accuracy: 10.948148148148148%
Epoch 7, Loss: 2.3047177093853883, Accuracy: 10.925925925925926%
Epoch 8, Loss: 2.303856722833986, Accuracy: 11.12037037037037%
Epoch 9, Loss: 2.3038409903151162, Accuracy: 11.14074074074074%
Epoch 10, Loss: 2.3028115534104443, Accuracy: 11.24074074074074%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▂▁▁▂▄▅▅▇▇█
train_loss,█▇▇▅▅▃▃▂▂▁
validation_accuracy,▁

0,1
epoch,10
train_accuracy,11.24074
train_loss,2.30281
trial_name,m_create_standard_ne...
validation_accuracy,10.88333


[34m[1mwandb[0m: Agent Starting Run: do1qwj1e with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: biased_dropout
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	model: create_standard_network_2
[34m[1mwandb[0m: 	momentum: 0.99
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Epoch 1, Loss: 2.314686783399627, Accuracy: 10.540740740740741%
Epoch 2, Loss: 2.3071049796461494, Accuracy: 11.35925925925926%
Epoch 3, Loss: 2.2906996184050756, Accuracy: 13.174074074074074%
Epoch 4, Loss: 2.2325829595186133, Accuracy: 18.62037037037037%
Epoch 5, Loss: 1.9565880208218833, Accuracy: 31.714814814814815%
Epoch 6, Loss: 1.5297392277638495, Accuracy: 44.861111111111114%
Epoch 7, Loss: 1.2335832075202635, Accuracy: 55.81111111111111%
Epoch 8, Loss: 1.0304274416365329, Accuracy: 63.407407407407405%
Epoch 9, Loss: 0.9175385637164681, Accuracy: 68.22962962962963%
Epoch 10, Loss: 0.8437717124042918, Accuracy: 71.37037037037037%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▁▁▂▃▅▆▇██
train_loss,████▆▄▃▂▁▁
validation_accuracy,▁

0,1
epoch,10
train_accuracy,71.37037
train_loss,0.84377
trial_name,m_create_standard_ne...
validation_accuracy,76.4


[34m[1mwandb[0m: Agent Starting Run: yq0fgd1d with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: flipover
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	model: create_dropout_network_relu
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Epoch 1, Loss: 2.302140318669414, Accuracy: 11.798148148148147%
Epoch 2, Loss: 2.3003498273438185, Accuracy: 15.222222222222221%
Epoch 3, Loss: 2.2985737035624787, Accuracy: 15.827777777777778%
Epoch 4, Loss: 2.296463081904497, Accuracy: 16.514814814814816%
Epoch 5, Loss: 2.2939558684543413, Accuracy: 18.48148148148148%
Epoch 6, Loss: 2.2905879585663853, Accuracy: 21.053703703703704%
Epoch 7, Loss: 2.2856873135431117, Accuracy: 24.37037037037037%
Epoch 8, Loss: 2.27824674652651, Accuracy: 25.751851851851853%
Epoch 9, Loss: 2.265034979271098, Accuracy: 25.192592592592593%
Epoch 10, Loss: 2.2397502697474585, Accuracy: 23.774074074074075%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▃▃▄▆▇██▇
train_loss,███▇▇▇▆▅▄▁
validation_accuracy,▁

0,1
epoch,10
train_accuracy,23.77407
train_loss,2.23975
trial_name,m_create_dropout_net...
validation_accuracy,28.48333


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5p8egpky with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: maxdropout
[34m[1mwandb[0m: 	dropout_prob: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	model: create_dropout_network_logistic
[34m[1mwandb[0m: 	momentum: 0.99
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Epoch 1, Loss: 2.322178803348993, Accuracy: 10.2%
Epoch 2, Loss: 2.3213957511418237, Accuracy: 10.268518518518519%
Epoch 3, Loss: 2.319338659539607, Accuracy: 10.162962962962963%
Epoch 4, Loss: 2.318204434279582, Accuracy: 10.116666666666667%
Epoch 5, Loss: 2.3172467257739244, Accuracy: 10.444444444444445%
Epoch 6, Loss: 2.3162091142758374, Accuracy: 10.292592592592593%
Epoch 7, Loss: 2.317132752088574, Accuracy: 10.201851851851853%
Epoch 8, Loss: 2.314833129186766, Accuracy: 10.1%
Epoch 9, Loss: 2.3142942602600534, Accuracy: 10.394444444444444%
Epoch 10, Loss: 2.314697989355331, Accuracy: 10.162962962962963%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▃▄▂▁█▅▃▁▇▂
train_loss,█▇▅▄▄▃▄▁▁▁
validation_accuracy,▁

0,1
epoch,10
train_accuracy,10.16296
train_loss,2.3147
trial_name,m_create_dropout_net...
validation_accuracy,11.51667


[34m[1mwandb[0m: Agent Starting Run: 7jzym7q5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: maxdropout
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	model: create_standard_network_1
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Epoch 1, Loss: 2.3035479120168643, Accuracy: 10.768518518518519%
Epoch 2, Loss: 2.302786775109892, Accuracy: 11.003703703703703%
Epoch 3, Loss: 2.3029743605315405, Accuracy: 10.887037037037038%
Epoch 4, Loss: 2.3029153338540786, Accuracy: 10.901851851851852%
Epoch 5, Loss: 2.302995502948761, Accuracy: 10.97962962962963%
Epoch 6, Loss: 2.3026437287647012, Accuracy: 10.983333333333333%
Epoch 7, Loss: 2.3024790496057808, Accuracy: 10.97962962962963%
Epoch 8, Loss: 2.302218095386198, Accuracy: 11.098148148148148%
Epoch 9, Loss: 2.3021446613338887, Accuracy: 10.868518518518519%
Epoch 10, Loss: 2.3024215068297362, Accuracy: 11.02037037037037%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▄▄▅▆▅█▃▆
train_loss,█▄▅▅▅▃▃▁▁▂
validation_accuracy,▁

0,1
epoch,10
train_accuracy,11.02037
train_loss,2.30242
trial_name,m_create_standard_ne...
validation_accuracy,10.68333


[34m[1mwandb[0m: Agent Starting Run: q85mqcme with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: biased_dropout
[34m[1mwandb[0m: 	dropout_prob: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	model: create_standard_network_1
[34m[1mwandb[0m: 	momentum: 0.99
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Epoch 1, Loss: 2.3029085680206807, Accuracy: 10.670370370370371%
Epoch 2, Loss: 2.3029544955181285, Accuracy: 10.829629629629629%
Epoch 3, Loss: 2.302960321519047, Accuracy: 10.694444444444445%
Epoch 4, Loss: 2.302467646474522, Accuracy: 11.018518518518519%
Epoch 5, Loss: 2.3027527880329655, Accuracy: 10.951851851851853%
Epoch 6, Loss: 2.3028164446636397, Accuracy: 10.957407407407407%
Epoch 7, Loss: 2.3022458996817963, Accuracy: 10.907407407407407%
Epoch 8, Loss: 2.3026644194860593, Accuracy: 10.907407407407407%
Epoch 9, Loss: 2.301896325219864, Accuracy: 11.107407407407408%
Epoch 10, Loss: 2.301961244000078, Accuracy: 11.148148148148149%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▁▆▅▅▄▄▇█
train_loss,███▅▇▇▃▆▁▁
validation_accuracy,▁

0,1
epoch,10
train_accuracy,11.14815
train_loss,2.30196
trial_name,m_create_standard_ne...
validation_accuracy,10.73333


[34m[1mwandb[0m: Agent Starting Run: 04xsmxzc with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: biased_dropout
[34m[1mwandb[0m: 	dropout_prob: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	model: create_dropout_network_logistic
[34m[1mwandb[0m: 	momentum: 0.99
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Epoch 1, Loss: 2.321923885865234, Accuracy: 10.464814814814815%
Epoch 2, Loss: 2.321089148238937, Accuracy: 10.366666666666667%
Epoch 3, Loss: 2.3189339383518526, Accuracy: 10.512962962962963%
Epoch 4, Loss: 2.3188130844260844, Accuracy: 10.105555555555556%
Epoch 5, Loss: 2.318038048337421, Accuracy: 10.248148148148148%
Epoch 6, Loss: 2.315951153565357, Accuracy: 10.257407407407408%
Epoch 7, Loss: 2.3153448627458366, Accuracy: 10.401851851851852%
Epoch 8, Loss: 2.314873009778877, Accuracy: 10.357407407407408%
Epoch 9, Loss: 2.314141500900142, Accuracy: 10.290740740740741%
Epoch 10, Loss: 2.314305024689408, Accuracy: 10.32037037037037%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▇▅█▁▃▄▆▅▄▅
train_loss,█▇▅▅▅▃▂▂▁▁
validation_accuracy,▁

0,1
epoch,10
train_accuracy,10.32037
train_loss,2.31431
trial_name,m_create_dropout_net...
validation_accuracy,11.06667


[34m[1mwandb[0m: Agent Starting Run: d7m80ysq with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: dropblock
[34m[1mwandb[0m: 	dropout_prob: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	model: create_dropout_network_relu
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Epoch 1, Loss: 2.301475048912645, Accuracy: 12.175925925925926%
Epoch 2, Loss: 2.299672288917252, Accuracy: 16.844444444444445%
Epoch 3, Loss: 2.2978957295417786, Accuracy: 19.353703703703705%
Epoch 4, Loss: 2.295639655883843, Accuracy: 20.733333333333334%
Epoch 5, Loss: 2.292934204447326, Accuracy: 22.57037037037037%
Epoch 6, Loss: 2.2890873412950343, Accuracy: 26.355555555555554%
Epoch 7, Loss: 2.28374356737634, Accuracy: 29.83888888888889%
Epoch 8, Loss: 2.275112891649183, Accuracy: 31.66296296296296%
Epoch 9, Loss: 2.2599542883900106, Accuracy: 29.522222222222222%
Epoch 10, Loss: 2.230000697888469, Accuracy: 26.17037037037037%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▄▅▆▇█▇▆
train_loss,███▇▇▇▆▅▄▁
validation_accuracy,▁

0,1
epoch,10
train_accuracy,26.17037
train_loss,2.23
trial_name,m_create_dropout_net...
validation_accuracy,28.25


[34m[1mwandb[0m: Agent Starting Run: a0wkmdjx with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: flipover
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	model: create_standard_network_2
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Epoch 1, Loss: 2.3109447360038757, Accuracy: 10.325925925925926%
Epoch 2, Loss: 2.3102651020926888, Accuracy: 10.548148148148147%
Epoch 3, Loss: 2.3087392310960597, Accuracy: 10.425925925925926%
Epoch 4, Loss: 2.307606799342621, Accuracy: 10.531481481481482%
Epoch 5, Loss: 2.3064858306075724, Accuracy: 10.9%
Epoch 6, Loss: 2.30623351199932, Accuracy: 10.75%
Epoch 7, Loss: 2.3044260652709347, Accuracy: 11.133333333333333%
Epoch 8, Loss: 2.3052413008789316, Accuracy: 10.953703703703704%
Epoch 9, Loss: 2.304041623504241, Accuracy: 10.985185185185186%
Epoch 10, Loss: 2.3030125445099237, Accuracy: 11.337037037037037%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▂▂▅▄▇▅▆█
train_loss,█▇▆▅▄▄▂▃▂▁
validation_accuracy,▁

0,1
epoch,10
train_accuracy,11.33704
train_loss,2.30301
trial_name,m_create_standard_ne...
validation_accuracy,11.86667


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import wandb
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms

# Initialize device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Dataset building function
def build_dataset(batch_size):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize for CIFAR-10
    ])

    dataset = datasets.CIFAR10(root='.', train=True, download=True, transform=transform)

    train_size = int(0.9 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader

# Neural Network Architecture with Dropout
class NeuralNetworkWithDropout(nn.Module):
    def __init__(self, input_size, hidden_layers, output_size, dropout_prob, activation_fn, init_method, max_threshold=None):
        super(NeuralNetworkWithDropout, self).__init__()
        self.init_method = init_method
        self.max_threshold = max_threshold

        layers = []

        # Input to first hidden layer
        layers.append(nn.Linear(input_size, hidden_layers[0]))
        layers.append(activation_fn())
        layers.append(nn.Dropout(dropout_prob))

        # Hidden layers with dropout
        for i in range(1, len(hidden_layers)):
            layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i]))
            layers.append(activation_fn())
            layers.append(nn.Dropout(dropout_prob))

        # Output layer
        layers.append(nn.Linear(hidden_layers[-1], output_size))

        self.network = nn.Sequential(*layers)
        self.apply(self._initialize_weights)

    def forward(self, x):
      # Flatten the input tensor
      x = x.view(x.size(0), -1)  # Flatten the 28x28 images into a 784 vector
      return self.network(x)


    def _initialize_weights(self, layer):
        if isinstance(layer, nn.Linear):
            if self.init_method == 'random':
                nn.init.normal_(layer.weight, mean=0, std=0.01)
            elif self.init_method == 'max_threshold':
                nn.init.normal_(layer.weight, mean=0, std=0.01)
                if self.max_threshold:
                    torch.clamp(layer.weight, max=self.max_threshold)
            elif self.init_method == 'pretraining':
                nn.init.normal_(layer.weight, mean=0, std=0.01)

            if layer.bias is not None:
                nn.init.constant_(layer.bias, 0)


# Experiment Configurations

# 1. StandardNeuralNet Logistic 2 layers, 100 units
def create_standard_network_1():
    return NeuralNetworkWithDropout(input_size=784, hidden_layers=[100, 100], output_size=10,
                                    dropout_prob=0.5, activation_fn=nn.Sigmoid, init_method='random')

# 2. StandardNeuralNet Logistic 2 layers, 800 units
def create_standard_network_2():
    return NeuralNetworkWithDropout(input_size=784, hidden_layers=[800, 800], output_size=10,
                                    dropout_prob=0.5, activation_fn=nn.Sigmoid, init_method='random')

# 3. DropoutNN Logistic 3 layers, 1024 units
def create_dropout_network_logistic():
    return NeuralNetworkWithDropout(input_size=784, hidden_layers=[1024, 1024, 1024], output_size=10,
                                    dropout_prob=0.5, activation_fn=nn.Sigmoid, init_method='random')

# 4. DropoutNN ReLU 3 layers, 1024 units
def create_dropout_network_relu():
    return NeuralNetworkWithDropout(input_size=784, hidden_layers=[1024, 1024, 1024], output_size=10,
                                    dropout_prob=0.5, activation_fn=nn.ReLU, init_method='random')


# DROPOUTS

# Define custom DropConnect layer
class DropConnect(nn.Module):
    def __init__(self, layer, p=0.5):
        super(DropConnect, self).__init__()
        self.layer = layer
        self.p = p

    def forward(self, x):
        if self.training:
            # DropConnect: Randomly zero out weights, not activations
            mask = (torch.rand_like(self.layer.weight) > self.p).float()
            weight = self.layer.weight * mask
            return F.linear(x, weight, self.layer.bias)
        else:
            return self.layer(x)

# Define custom DropBlock layer
class DropBlock(nn.Module):
    def __init__(self, p=0.5):
        super(DropBlock, self).__init__()
        self.p = p

    def forward(self, x):
        if self.training:
            # DropBlock: Randomly block entire blocks of activations
            block_size = int(x.size(1) * self.p)
            mask = torch.ones_like(x)
            mask[:, :block_size] = 0  # You can modify this logic to randomly block in more advanced ways
            x = x * mask
        return x

# Define Maxdropout (drop the largest activations)
class MaxDropout(nn.Module):
    def __init__(self, p=0.5):
        super(MaxDropout, self).__init__()
        self.p = p

    def forward(self, x):
        if self.training:
            # Drop the max activations
            top_k = int(x.size(1) * self.p)
            _, indices = torch.topk(x, top_k, dim=1, largest=True, sorted=False)
            mask = torch.zeros_like(x)
            mask.scatter_(1, indices, 1)
            x = x * mask
        return x

# Define Biased Dropout
class BiasedDropout(nn.Module):
    def __init__(self, p=0.5, bias=0.2):
        super(BiasedDropout, self).__init__()
        self.p = p
        self.bias = bias

    def forward(self, x):
        if self.training:
            # Biased Dropout: Apply biased dropout, where some neurons are more likely to be dropped
            prob = torch.full_like(x, self.p + self.bias)
            mask = (torch.rand_like(x) > prob).float()
            x = x * mask
        return x

# Define Flipover Dropout
class FlipoverDropout(nn.Module):
    def __init__(self, p=0.5):
        super(FlipoverDropout, self).__init__()
        self.p = p

    def forward(self, x):
        if self.training:
            # Flipover: Randomly negate the activations of dropped units
            mask = (torch.rand_like(x) > self.p).float()
            x = x * mask
            x = x - (x * mask)  # Negate the dropped values
        return x

# Main function to apply different dropout methods
def apply_dropout_method(model, method_name, dropout_prob=0.5):
    if method_name == "random":
        # Apply standard random dropout to each layer
        for module in model.children():
            if isinstance(module, nn.Linear):
                module.dropout = nn.Dropout(dropout_prob)
        return model

    if method_name == "dropconnect":
        # Apply DropConnect
        for module in model.children():
            if isinstance(module, nn.Linear):
                module = DropConnect(module, p=dropout_prob)
        return model

    if method_name == "dropblock":
        # Apply DropBlock
        for module in model.children():
            if isinstance(module, nn.Linear):
                module = DropBlock(p=dropout_prob)
        return model

    if method_name == "maxdropout":
        # Apply Maxdropout
        for module in model.children():
            if isinstance(module, nn.Linear):
                module = MaxDropout(p=dropout_prob)
        return model

    if method_name == "biased_dropout":
        # Apply Biased Dropout
        for module in model.children():
            if isinstance(module, nn.Linear):
                module = BiasedDropout(p=dropout_prob)
        return model

    if method_name == "flipover":
        # Apply Flipover Dropout
        for module in model.children():
            if isinstance(module, nn.Linear):
                module = FlipoverDropout(p=dropout_prob)
        return model

    # Default: no dropout
    return model


# Optimizer function
def get_optimizer(model, optimizer_name, lr, momentum=0, weight_decay=0):
    if optimizer_name == 'sgd':
        return optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    else:
        raise ValueError("Optimizer not supported")


# Training function
def train(model, train_loader, optimizer, criterion, epochs):
        config = wandb.config

        model.train()
        for epoch in range(epochs):
            running_loss = 0.0
            correct = 0
            total = 0
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            wandb.log({
                "epoch": epoch + 1,
                "train_loss": running_loss / len(train_loader),
                "train_accuracy": 100 * correct / total,
                "trial_name": f"m_{config.model}_dr_{config.dropout_method}_p_{config.dropout_prob}lr_{config.lr}_m_{config.momentum}"  # Add trial name
            })

            print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}, Accuracy: {100 * correct / total}%")

# Evaluation function
def evaluate(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

import wandb

def run_experiment():
    # Initialize a new wandb run
    with wandb.init() as run:
        # Access the sweep config from wandb
        config = wandb.config

        # Generate a custom trial name using hyperparameters from the config
        trial_name = f"m_{config.model}_dr_{config.dropout_method}_p_{config.dropout_prob}lr_{config.lr}_m_{config.momentum}"

        run.name = trial_name

        # Build dataset for training and validation
        train_loader, val_loader = build_dataset(config.batch_size)

        # Choose model based on config
        if config.model == 'create_standard_network_1':
            model = create_standard_network_1().to(device)
        elif config.model == 'create_standard_network_2':
            model = create_standard_network_2().to(device)
        elif config.model == 'create_dropout_network_logistic':
            model = create_dropout_network_logistic().to(device)
        elif config.model == 'create_dropout_network_relu':
            model = create_dropout_network_relu().to(device)
        else:
            raise ValueError(f"Unknown model: {config.model}")

        # Apply the selected dropout method
        model = apply_dropout_method(model, config.dropout_method, dropout_prob=config.dropout_prob)

        # Define loss and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = get_optimizer(model, config.optimizer, config.lr)

        # Train the model
        train(model, train_loader, optimizer, criterion, config.epochs)

        # Evaluate the model
        val_accuracy = evaluate(model, val_loader)
        wandb.log({"validation_accuracy": val_accuracy, "trial_name": trial_name})

# Run the sweep
wandb.agent(sweep_id, run_experiment,count=15)

[34m[1mwandb[0m: Agent Starting Run: lbtn3ebl with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_method: random
[34m[1mwandb[0m: 	dropout_prob: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	model: create_dropout_network_logistic
[34m[1mwandb[0m: 	momentum: 0.99
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_init: random


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:11<00:00, 14.6MB/s]


Extracting ./cifar-10-python.tar.gz to .


Traceback (most recent call last):
  File "<ipython-input-7-dd308e14cb39>", line 319, in run_experiment
    train(model, train_loader, optimizer, criterion, config.epochs)
  File "<ipython-input-7-dd308e14cb39>", line 248, in train
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-7-dd308e14cb39>", line 59, in forward
    return self.network(x)
           ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/loca

Run lbtn3ebl errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "<ipython-input-7-dd308e14cb39>", line 319, in run_experiment
    train(model, train_loader, optimizer, criterion, config.epochs)
  File "<ipython-input-7-dd308e14cb39>", line 248, in train
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-7-dd308e14cb39>", line 59, in forward
    return self.network(x)
           ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py",

Files already downloaded and verified


Traceback (most recent call last):
  File "<ipython-input-7-dd308e14cb39>", line 319, in run_experiment
    train(model, train_loader, optimizer, criterion, config.epochs)
  File "<ipython-input-7-dd308e14cb39>", line 248, in train
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-7-dd308e14cb39>", line 59, in forward
    return self.network(x)
           ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/loca

Run 1hwdknr5 errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "<ipython-input-7-dd308e14cb39>", line 319, in run_experiment
    train(model, train_loader, optimizer, criterion, config.epochs)
  File "<ipython-input-7-dd308e14cb39>", line 248, in train
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-7-dd308e14cb39>", line 59, in forward
    return self.network(x)
           ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py",

Files already downloaded and verified


Traceback (most recent call last):
  File "<ipython-input-7-dd308e14cb39>", line 319, in run_experiment
    train(model, train_loader, optimizer, criterion, config.epochs)
  File "<ipython-input-7-dd308e14cb39>", line 248, in train
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-7-dd308e14cb39>", line 59, in forward
    return self.network(x)
           ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/loca

Run yijngqcr errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "<ipython-input-7-dd308e14cb39>", line 319, in run_experiment
    train(model, train_loader, optimizer, criterion, config.epochs)
  File "<ipython-input-7-dd308e14cb39>", line 248, in train
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-7-dd308e14cb39>", line 59, in forward
    return self.network(x)
           ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py",

Files already downloaded and verified


Traceback (most recent call last):
  File "<ipython-input-7-dd308e14cb39>", line 319, in run_experiment
    train(model, train_loader, optimizer, criterion, config.epochs)
  File "<ipython-input-7-dd308e14cb39>", line 248, in train
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-7-dd308e14cb39>", line 59, in forward
    return self.network(x)
           ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/loca

Run u6hc3d92 errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "<ipython-input-7-dd308e14cb39>", line 319, in run_experiment
    train(model, train_loader, optimizer, criterion, config.epochs)
  File "<ipython-input-7-dd308e14cb39>", line 248, in train
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-7-dd308e14cb39>", line 59, in forward
    return self.network(x)
           ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py",

Files already downloaded and verified


Traceback (most recent call last):
  File "<ipython-input-7-dd308e14cb39>", line 319, in run_experiment
    train(model, train_loader, optimizer, criterion, config.epochs)
  File "<ipython-input-7-dd308e14cb39>", line 248, in train
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-7-dd308e14cb39>", line 59, in forward
    return self.network(x)
           ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/loca

Run 7ef9golk errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "<ipython-input-7-dd308e14cb39>", line 319, in run_experiment
    train(model, train_loader, optimizer, criterion, config.epochs)
  File "<ipython-input-7-dd308e14cb39>", line 248, in train
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-7-dd308e14cb39>", line 59, in forward
    return self.network(x)
           ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py",

Files already downloaded and verified


Traceback (most recent call last):
  File "<ipython-input-7-dd308e14cb39>", line 319, in run_experiment
    train(model, train_loader, optimizer, criterion, config.epochs)
  File "<ipython-input-7-dd308e14cb39>", line 248, in train
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-7-dd308e14cb39>", line 59, in forward
    return self.network(x)
           ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/loca

Run ydkq8k6m errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "<ipython-input-7-dd308e14cb39>", line 319, in run_experiment
    train(model, train_loader, optimizer, criterion, config.epochs)
  File "<ipython-input-7-dd308e14cb39>", line 248, in train
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-7-dd308e14cb39>", line 59, in forward
    return self.network(x)
           ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py",