In [1]:
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from tqdm import tqdm
import wandb

In [None]:
# # Create a pytorch dataset of images
# dataset_path = 'datasets/face_mask_detection'

# # Calculate dataset mean and std for normalization
# def calculate_mean_std(dataset_path):
#     sum_mean = np.zeros(3, dtype=np.float64)
#     sum_std = np.zeros(3, dtype=np.float64)
#     count = 0
#     class_names = ['without_mask', 'with_mask']
    
#     print("Calculating dataset mean and std...")
#     for class_name in class_names:
#         class_dir = os.path.join(dataset_path, class_name)
#         image_paths = [os.path.join(class_dir, img) for img in os.listdir(class_dir)]
        
#         # Process images in batches with tqdm
#         for img_path in tqdm(image_paths, desc=f"Processing {class_name}"):
#             # Read image in grayscale first to check if it's valid
#             if not os.path.exists(img_path):
#                 continue
                
#             img = cv2.imread(img_path)
#             if img is None:
#                 continue
#             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#             img = cv2.resize(img, (128, 128))
#             img = img.astype(np.float32) / 255.0
            
#             # Update running statistics
#             img_mean = img.mean(axis=(0, 1))
#             img_std = img.std(axis=(0, 1))
            
#             sum_mean += img_mean
#             sum_std += img_std
#             count += 1
    
#     mean = sum_mean / count
#     std = sum_std / count
    
#     return mean.astype(np.float32), std.astype(np.float32)

# # Calculate mean and std for the dataset
# mean, std = calculate_mean_std(dataset_path)
# print(f"Dataset mean: {mean}")
# print(f"Dataset std: {std}")

Calculating dataset mean and std...


Processing without_mask: 100%|██████████| 1930/1930 [00:01<00:00, 1071.87it/s]
Processing with_mask: 100%|██████████| 2165/2165 [00:04<00:00, 454.78it/s]

Dataset mean: [0.5748376  0.49752444 0.46703878]
Dataset std: [0.25625145 0.24203679 0.23397043]





In [2]:
dataset_path = 'datasets/face_mask_detection'

# Create a pytorch dataset
class FaceMaskDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.labels = []
        self.class_names = ['without_mask', 'with_mask']
        for i, class_name in enumerate(self.class_names):
            class_dir = os.path.join(self.root_dir, class_name)
            for image_name in os.listdir(class_dir):
                image_path = os.path.join(class_dir, image_name)
                self.images.append(image_path)  # Store path instead of loaded image
                self.labels.append(i)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image_path = self.images[idx]
        image = cv2.imread(image_path)
        # Convert BGR to RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # Resize to match training size
        image = cv2.resize(image, (128, 128))
        # Convert to tensor first
        image = image.astype(np.float32) / 255.0
        image = torch.from_numpy(image).permute(2, 0, 1)  # HWC to CHW
        
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
            
        return image, label
    
# Create a pytorch dataloader
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.Normalize([0.5748376, 0.49752444, 0.46703878], [0.25625145, 0.24203679, 0.23397043])
])

dataset = FaceMaskDataset(dataset_path, transform=transform)

In [3]:
# Split the dataset into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=16, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=16, pin_memory=True)

In [4]:
# Create a highly configurable CNN model
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, 
                 padding=1, pool_type='max', pool_size=2, pool_stride=2, 
                 activation='relu', batch_norm=False):
        super(ConvBlock, self).__init__()
        
        # Conv layer
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        
        # Batch normalization (optional)
        self.batch_norm = None
        if batch_norm:
            self.batch_norm = nn.BatchNorm2d(out_channels)
        
        # Activation function
        self.activation_type = activation
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'leaky_relu':
            self.activation = nn.LeakyReLU(0.1)
        elif activation == 'elu':
            self.activation = nn.ELU()
        elif activation == 'tanh':
            self.activation = nn.Tanh()
        elif activation == 'sigmoid':
            self.activation = nn.Sigmoid()
        
        # Pooling layer
        self.pool = None
        if pool_type == 'max':
            self.pool = nn.MaxPool2d(pool_size, pool_stride)
        elif pool_type == 'avg':
            self.pool = nn.AvgPool2d(pool_size, pool_stride)

    def forward(self, x):
        x = self.conv(x)
        if self.batch_norm:
            x = self.batch_norm(x)
        x = self.activation(x)
        if self.pool:
            x = self.pool(x)
        return x

class CNN(nn.Module):
    def __init__(self, in_channels=3, num_classes=2, 
                 conv_channels=[16, 32, 64], # List of channels for each conv layer
                 kernel_sizes=[3, 3, 3],     # Kernel sizes for each conv layer
                 pool_types=['max', 'max', 'max'],   # Pooling type for each layer
                 pool_sizes=[2, 2, 2],       # Pool sizes for each layer
                 activations=['relu', 'relu', 'relu'],  # Activation functions
                 use_batch_norm=False,       # Whether to use batch normalization
                 fc_sizes=[128],             # Fully connected layer sizes
                 dropout_rates=[0.5],        # Dropout rates for fc layers
                 final_pool_size=(4, 4)):    # Final adaptive pooling size
                 
        super(CNN, self).__init__()
        
        # Ensure all parameter lists have the same length as conv_channels
        num_conv_layers = len(conv_channels)
        kernel_sizes = self._extend_param(kernel_sizes, num_conv_layers)
        pool_types = self._extend_param(pool_types, num_conv_layers)
        pool_sizes = self._extend_param(pool_sizes, num_conv_layers)
        activations = self._extend_param(activations, num_conv_layers)
        
        # Create convolutional blocks
        self.conv_blocks = nn.ModuleList()
        current_channels = in_channels
        
        for i in range(num_conv_layers):
            self.conv_blocks.append(
                ConvBlock(
                    in_channels=current_channels,
                    out_channels=conv_channels[i],
                    kernel_size=kernel_sizes[i],
                    pool_type=pool_types[i],
                    pool_size=pool_sizes[i],
                    activation=activations[i],
                    batch_norm=use_batch_norm
                )
            )
            current_channels = conv_channels[i]
        
        # Final adaptive pooling layer
        self.adaptive_pool = nn.AdaptiveAvgPool2d(final_pool_size)
        
        # Calculate the flattened size after conv and pooling
        self.flattened_size = conv_channels[-1] * final_pool_size[0] * final_pool_size[1]
        
        # Create fully connected layers
        self.fc_layers = nn.ModuleList()
        self.dropout_layers = nn.ModuleList()
        
        current_size = self.flattened_size
        for i, fc_size in enumerate(fc_sizes):
            self.fc_layers.append(nn.Linear(current_size, fc_size))
            self.dropout_layers.append(nn.Dropout(dropout_rates[min(i, len(dropout_rates)-1)]))
            current_size = fc_size
            
        # Final classification layer
        self.final_fc = nn.Linear(current_size, num_classes)
        
        # Save configuration for forward pass
        self.final_pool_size = final_pool_size
        self.num_conv_layers = num_conv_layers
        self.fc_sizes = fc_sizes
        self.activations = activations

    def _extend_param(self, param_list, target_length):
        """Extend parameter list to the target length by repeating the last element"""
        if len(param_list) >= target_length:
            return param_list[:target_length]
        else:
            return param_list + [param_list[-1]] * (target_length - len(param_list))

    def forward(self, x):
        # Pass through all convolutional blocks
        for block in self.conv_blocks:
            x = block(x)
        
        # Apply adaptive pooling
        x = self.adaptive_pool(x)
        
        # Flatten
        x = x.view(-1, self.flattened_size)
        
        # Fully connected layers with activation and dropout
        for i, (fc_layer, dropout_layer) in enumerate(zip(self.fc_layers, self.dropout_layers)):
            x = fc_layer(x)
            # Apply the same activation function as specified for the last conv layer
            if self.activations[-1] == 'relu':
                x = torch.relu(x)
            elif self.activations[-1] == 'leaky_relu':
                x = torch.nn.functional.leaky_relu(x, 0.1)
            elif self.activations[-1] == 'elu':
                x = torch.nn.functional.elu(x)
            elif self.activations[-1] == 'tanh':
                x = torch.tanh(x)
            elif self.activations[-1] == 'sigmoid':
                x = torch.sigmoid(x)
            x = dropout_layer(x)
        
        # Final classification layer
        x = self.final_fc(x)
        return x

In [5]:
sweep_config = {
    'method': 'bayes',  # Bayesian optimization, alternatives: 'grid', 'random'
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'learning_rate': {
            'min': 0.0001,
            'max': 0.01
        },
        'batch_size': {
            'values': [16, 32, 64, 128]
        },
        'conv_channels': {
            'values': [
                [16, 32, 64], 
                [32, 64, 128],
                [16, 32, 64, 128]
            ]
        },
        'activations': {
            'values': [
                ['relu', 'relu', 'relu'],
                ['leaky_relu', 'leaky_relu', 'leaky_relu'],
            ]
        },
        'fc_sizes': {
            'values': [
                [128],
                [256],
                [128, 64],
                [256, 128]
            ]
        },
        'dropout_rates': {
            'values': [
                [0.0],
                [0.3],
                [0.5],
            ]
        },
        'use_batch_norm': {
            'values': [True, False]
        },
        'optimizer': {
            'values': ['adam', 'adamw']
        }
    }
}

In [6]:
def train_model():
    # Initialize a wandb run
    run = wandb.init()
    
    # Access your hyperparameters from the run config
    config = wandb.config
    
    # Create your model with the hyperparameters
    model = CNN(
        in_channels=3,  # Assuming RGB images
        num_classes=2,  # Binary classification (mask/no mask)
        conv_channels=config.conv_channels,
        activations=config.activations,
        fc_sizes=config.fc_sizes,
        dropout_rates=config.dropout_rates,
        use_batch_norm=config.use_batch_norm
    )
    
    # Create datasets and dataloaders
    # (You'll need to replace this with your actual dataset setup)
    train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False)
    
    # Set up optimizer
    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adamw':
        optimizer = optim.AdamW(model.parameters(), lr=config.learning_rate)
    else:
        optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, momentum=0.9)
    
    # Loss function
    criterion = nn.CrossEntropyLoss()
    
    # Training loop
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    num_epochs = 10  # You can also make this a hyperparameter
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        correct_train = 0
        total_train = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total_train += labels.size(0)
            correct_train += predicted.eq(labels).sum().item()
        
        train_accuracy = correct_train / total_train
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total_val += labels.size(0)
                correct_val += predicted.eq(labels).sum().item()
        
        val_accuracy = correct_val / total_val
        
        # Log metrics to wandb
        wandb.log({
            'epoch': epoch,
            'train_loss': train_loss / len(train_loader),
            'train_accuracy': train_accuracy,
            'val_loss': val_loss / len(val_loader),
            'val_accuracy': val_accuracy
        })
        
        print(f"Epoch {epoch+1}/{num_epochs}, "
              f"Train Loss: {train_loss/len(train_loader):.4f}, "
              f"Train Acc: {train_accuracy:.4f}, "
              f"Val Loss: {val_loss/len(val_loader):.4f}, "
              f"Val Acc: {val_accuracy:.4f}")

In [7]:
# Initialize a sweep
sweep_id = wandb.sweep(sweep_config, project='face-mask-detection-vr-mini-project-1', entity='ritishtest1')

# Run the sweep
wandb.agent(sweep_id, function=train_model)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: uo9mdr7h
Sweep URL: https://wandb.ai/ritishtest1/face-mask-detection-vr-mini-project-1/sweeps/uo9mdr7h


[34m[1mwandb[0m: Agent Starting Run: e2iqoq5y with config:
[34m[1mwandb[0m: 	activations: ['relu', 'relu', 'relu']
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_channels: [16, 32, 64]
[34m[1mwandb[0m: 	dropout_rates: [0]
[34m[1mwandb[0m: 	fc_sizes: [256]
[34m[1mwandb[0m: 	learning_rate: 0.002516890529200087
[34m[1mwandb[0m: 	optimizer: adamw
[34m[1mwandb[0m: 	use_batch_norm: True
[34m[1mwandb[0m: Currently logged in as: [33mritishshrirao[0m ([33mritishtest1[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Epoch 1/10, Train Loss: 0.3583, Train Acc: 0.8626, Val Loss: 0.2449, Val Acc: 0.9206




Epoch 2/10, Train Loss: 0.2377, Train Acc: 0.9100, Val Loss: 0.2823, Val Acc: 0.8828




Epoch 3/10, Train Loss: 0.2099, Train Acc: 0.9197, Val Loss: 0.1670, Val Acc: 0.9402




Epoch 4/10, Train Loss: 0.2043, Train Acc: 0.9243, Val Loss: 0.2559, Val Acc: 0.8974




Epoch 5/10, Train Loss: 0.1782, Train Acc: 0.9322, Val Loss: 0.1609, Val Acc: 0.9451




Epoch 6/10, Train Loss: 0.1624, Train Acc: 0.9341, Val Loss: 0.1362, Val Acc: 0.9524




Epoch 7/10, Train Loss: 0.1634, Train Acc: 0.9405, Val Loss: 0.1581, Val Acc: 0.9414




Epoch 8/10, Train Loss: 0.1647, Train Acc: 0.9353, Val Loss: 0.1541, Val Acc: 0.9463




Epoch 9/10, Train Loss: 0.1488, Train Acc: 0.9423, Val Loss: 0.1265, Val Acc: 0.9573




Epoch 10/10, Train Loss: 0.1470, Train Acc: 0.9451, Val Loss: 0.1145, Val Acc: 0.9585


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▆▇▇█▇██
train_loss,█▄▃▃▂▂▂▂▁▁
val_accuracy,▅▁▆▂▇▇▆▇██
val_loss,▆█▃▇▃▂▃▃▁▁

0,1
epoch,9.0
train_accuracy,0.94505
train_loss,0.147
val_accuracy,0.95849
val_loss,0.11451


[34m[1mwandb[0m: Agent Starting Run: 6thvt3l0 with config:
[34m[1mwandb[0m: 	activations: ['relu', 'relu', 'relu']
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_channels: [16, 32, 64, 128]
[34m[1mwandb[0m: 	dropout_rates: [0.3]
[34m[1mwandb[0m: 	fc_sizes: [128]
[34m[1mwandb[0m: 	learning_rate: 0.006740643842678123
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	use_batch_norm: True




Epoch 1/10, Train Loss: 0.6852, Train Acc: 0.7961, Val Loss: 0.3301, Val Acc: 0.8694




Epoch 2/10, Train Loss: 0.2720, Train Acc: 0.9029, Val Loss: 0.1805, Val Acc: 0.9341




Epoch 3/10, Train Loss: 0.2228, Train Acc: 0.9200, Val Loss: 0.2890, Val Acc: 0.8779




Epoch 4/10, Train Loss: 0.2221, Train Acc: 0.9206, Val Loss: 0.1765, Val Acc: 0.9377




Epoch 5/10, Train Loss: 0.1997, Train Acc: 0.9246, Val Loss: 0.1497, Val Acc: 0.9438




Epoch 6/10, Train Loss: 0.1973, Train Acc: 0.9283, Val Loss: 0.2342, Val Acc: 0.9133




Epoch 7/10, Train Loss: 0.1987, Train Acc: 0.9228, Val Loss: 0.1621, Val Acc: 0.9316




Epoch 8/10, Train Loss: 0.1816, Train Acc: 0.9325, Val Loss: 0.1538, Val Acc: 0.9499




Epoch 9/10, Train Loss: 0.1901, Train Acc: 0.9289, Val Loss: 0.2094, Val Acc: 0.9170




Epoch 10/10, Train Loss: 0.1462, Train Acc: 0.9460, Val Loss: 0.1236, Val Acc: 0.9536


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇▇▇▇▇▇█
train_loss,█▃▂▂▂▂▂▁▂▁
val_accuracy,▁▆▂▇▇▅▆█▅█
val_loss,█▃▇▃▂▅▂▂▄▁

0,1
epoch,9.0
train_accuracy,0.94597
train_loss,0.1462
val_accuracy,0.9536
val_loss,0.12365


[34m[1mwandb[0m: Agent Starting Run: zhhduoiz with config:
[34m[1mwandb[0m: 	activations: ['leaky_relu', 'leaky_relu', 'leaky_relu']
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_channels: [16, 32, 64]
[34m[1mwandb[0m: 	dropout_rates: [0.5]
[34m[1mwandb[0m: 	fc_sizes: [128, 64]
[34m[1mwandb[0m: 	learning_rate: 0.006250588547753344
[34m[1mwandb[0m: 	optimizer: adamw
[34m[1mwandb[0m: 	use_batch_norm: True




Epoch 1/10, Train Loss: 0.4382, Train Acc: 0.8309, Val Loss: 0.3331, Val Acc: 0.8864




Epoch 2/10, Train Loss: 0.3410, Train Acc: 0.8761, Val Loss: 0.3584, Val Acc: 0.8425




Epoch 3/10, Train Loss: 0.3084, Train Acc: 0.8825, Val Loss: 0.2708, Val Acc: 0.9035




Epoch 4/10, Train Loss: 0.2835, Train Acc: 0.9066, Val Loss: 0.2156, Val Acc: 0.9182




Epoch 5/10, Train Loss: 0.2538, Train Acc: 0.9066, Val Loss: 0.2149, Val Acc: 0.9145




Epoch 6/10, Train Loss: 0.2612, Train Acc: 0.9084, Val Loss: 0.2160, Val Acc: 0.9292




Epoch 7/10, Train Loss: 0.2573, Train Acc: 0.9072, Val Loss: 0.1625, Val Acc: 0.9402




Epoch 8/10, Train Loss: 0.2430, Train Acc: 0.9176, Val Loss: 0.2380, Val Acc: 0.9072




Epoch 9/10, Train Loss: 0.2339, Train Acc: 0.9164, Val Loss: 0.1542, Val Acc: 0.9316




Epoch 10/10, Train Loss: 0.2345, Train Acc: 0.9212, Val Loss: 0.2031, Val Acc: 0.9194


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▅▇▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▄▁▅▆▆▇█▆▇▇
val_loss,▇█▅▃▃▃▁▄▁▃

0,1
epoch,9.0
train_accuracy,0.92125
train_loss,0.23452
val_accuracy,0.91941
val_loss,0.20315


[34m[1mwandb[0m: Agent Starting Run: 2lrjwwol with config:
[34m[1mwandb[0m: 	activations: ['leaky_relu', 'leaky_relu', 'leaky_relu']
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_channels: [32, 64, 128]
[34m[1mwandb[0m: 	dropout_rates: [0.3]
[34m[1mwandb[0m: 	fc_sizes: [128, 64]
[34m[1mwandb[0m: 	learning_rate: 0.0026404616736181987
[34m[1mwandb[0m: 	optimizer: adamw
[34m[1mwandb[0m: 	use_batch_norm: True




Epoch 1/10, Train Loss: 0.3580, Train Acc: 0.8611, Val Loss: 0.2910, Val Acc: 0.8950




Epoch 2/10, Train Loss: 0.2863, Train Acc: 0.8971, Val Loss: 0.2019, Val Acc: 0.9243




Epoch 3/10, Train Loss: 0.2466, Train Acc: 0.9060, Val Loss: 0.1592, Val Acc: 0.9426




Epoch 4/10, Train Loss: 0.2362, Train Acc: 0.9118, Val Loss: 0.1589, Val Acc: 0.9426




Epoch 5/10, Train Loss: 0.2208, Train Acc: 0.9185, Val Loss: 0.1955, Val Acc: 0.9133




Epoch 6/10, Train Loss: 0.2210, Train Acc: 0.9212, Val Loss: 0.1362, Val Acc: 0.9499




Epoch 7/10, Train Loss: 0.1986, Train Acc: 0.9203, Val Loss: 0.2041, Val Acc: 0.9341




Epoch 8/10, Train Loss: 0.1839, Train Acc: 0.9374, Val Loss: 0.1411, Val Acc: 0.9426




Epoch 9/10, Train Loss: 0.1700, Train Acc: 0.9396, Val Loss: 0.1487, Val Acc: 0.9463




Epoch 10/10, Train Loss: 0.1740, Train Acc: 0.9371, Val Loss: 0.1540, Val Acc: 0.9438


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▆▆███
train_loss,█▅▄▃▃▃▂▂▁▁
val_accuracy,▁▅▇▇▃█▆▇█▇
val_loss,█▄▂▂▄▁▄▁▂▂

0,1
epoch,9.0
train_accuracy,0.93712
train_loss,0.17402
val_accuracy,0.94383
val_loss,0.15399


[34m[1mwandb[0m: Agent Starting Run: 04i9d71u with config:
[34m[1mwandb[0m: 	activations: ['relu', 'relu', 'relu']
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_channels: [16, 32, 64]
[34m[1mwandb[0m: 	dropout_rates: [0]
[34m[1mwandb[0m: 	fc_sizes: [256, 128]
[34m[1mwandb[0m: 	learning_rate: 0.006549785684219953
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	use_batch_norm: False




Epoch 1/10, Train Loss: 0.4844, Train Acc: 0.7790, Val Loss: 0.3292, Val Acc: 0.8864




Epoch 2/10, Train Loss: 0.3371, Train Acc: 0.8730, Val Loss: 0.3122, Val Acc: 0.8974




Epoch 3/10, Train Loss: 0.3066, Train Acc: 0.8834, Val Loss: 0.3045, Val Acc: 0.8999




Epoch 4/10, Train Loss: 0.2983, Train Acc: 0.8871, Val Loss: 0.2556, Val Acc: 0.9023




Epoch 5/10, Train Loss: 0.3029, Train Acc: 0.8868, Val Loss: 0.2713, Val Acc: 0.8877




Epoch 6/10, Train Loss: 0.2457, Train Acc: 0.9106, Val Loss: 0.2404, Val Acc: 0.9072




Epoch 7/10, Train Loss: 0.2279, Train Acc: 0.9109, Val Loss: 0.2181, Val Acc: 0.9182




Epoch 8/10, Train Loss: 0.2175, Train Acc: 0.9170, Val Loss: 0.2712, Val Acc: 0.9072




Epoch 9/10, Train Loss: 0.2274, Train Acc: 0.9118, Val Loss: 0.2514, Val Acc: 0.8926




Epoch 10/10, Train Loss: 0.2227, Train Acc: 0.9158, Val Loss: 0.1977, Val Acc: 0.9231


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▆▆▆█████
train_loss,█▄▃▃▃▂▁▁▁▁
val_accuracy,▁▃▄▄▁▅▇▅▂█
val_loss,█▇▇▄▅▃▂▅▄▁

0,1
epoch,9.0
train_accuracy,0.91575
train_loss,0.22274
val_accuracy,0.92308
val_loss,0.19769


[34m[1mwandb[0m: Agent Starting Run: wd1yuuwm with config:
[34m[1mwandb[0m: 	activations: ['relu', 'relu', 'relu']
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_channels: [16, 32, 64]
[34m[1mwandb[0m: 	dropout_rates: [0.3]
[34m[1mwandb[0m: 	fc_sizes: [256]
[34m[1mwandb[0m: 	learning_rate: 0.0023575911437658404
[34m[1mwandb[0m: 	optimizer: adamw
[34m[1mwandb[0m: 	use_batch_norm: False




Epoch 1/10, Train Loss: 0.3533, Train Acc: 0.8565, Val Loss: 0.2599, Val Acc: 0.9084




Epoch 2/10, Train Loss: 0.2316, Train Acc: 0.9170, Val Loss: 0.1794, Val Acc: 0.9304




Epoch 3/10, Train Loss: 0.2193, Train Acc: 0.9200, Val Loss: 0.1955, Val Acc: 0.9341




Epoch 4/10, Train Loss: 0.2127, Train Acc: 0.9249, Val Loss: 0.1737, Val Acc: 0.9316




Epoch 5/10, Train Loss: 0.1882, Train Acc: 0.9301, Val Loss: 0.1449, Val Acc: 0.9438




Epoch 6/10, Train Loss: 0.1866, Train Acc: 0.9301, Val Loss: 0.1952, Val Acc: 0.9231




Epoch 7/10, Train Loss: 0.1744, Train Acc: 0.9393, Val Loss: 0.1089, Val Acc: 0.9560




Epoch 8/10, Train Loss: 0.1550, Train Acc: 0.9444, Val Loss: 0.2904, Val Acc: 0.8803




Epoch 9/10, Train Loss: 0.1699, Train Acc: 0.9402, Val Loss: 0.1572, Val Acc: 0.9451




Epoch 10/10, Train Loss: 0.1457, Train Acc: 0.9490, Val Loss: 0.1862, Val Acc: 0.9182


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▆▆▇▇▇█▇█
train_loss,█▄▃▃▂▂▂▁▂▁
val_accuracy,▄▆▆▆▇▅█▁▇▅
val_loss,▇▄▄▃▂▄▁█▃▄

0,1
epoch,9.0
train_accuracy,0.94902
train_loss,0.14574
val_accuracy,0.91819
val_loss,0.18624


[34m[1mwandb[0m: Agent Starting Run: gs67mx6o with config:
[34m[1mwandb[0m: 	activations: ['relu', 'relu', 'relu']
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	conv_channels: [32, 64, 128]
[34m[1mwandb[0m: 	dropout_rates: [0.5]
[34m[1mwandb[0m: 	fc_sizes: [256, 128]
[34m[1mwandb[0m: 	learning_rate: 0.00884007052179035
[34m[1mwandb[0m: 	optimizer: adamw
[34m[1mwandb[0m: 	use_batch_norm: False




Epoch 1/10, Train Loss: 0.9137, Train Acc: 0.5082, Val Loss: 0.6935, Val Acc: 0.5055




Epoch 2/10, Train Loss: 0.6918, Train Acc: 0.5357, Val Loss: 0.6943, Val Acc: 0.5055




Epoch 3/10, Train Loss: 0.6911, Train Acc: 0.5345, Val Loss: 0.6945, Val Acc: 0.5055




Epoch 4/10, Train Loss: 0.6903, Train Acc: 0.5345, Val Loss: 0.6947, Val Acc: 0.5055




Epoch 5/10, Train Loss: 0.6921, Train Acc: 0.5345, Val Loss: 0.6936, Val Acc: 0.5055




Epoch 6/10, Train Loss: 0.6908, Train Acc: 0.5345, Val Loss: 0.6947, Val Acc: 0.5055




Epoch 7/10, Train Loss: 0.6909, Train Acc: 0.5345, Val Loss: 0.6946, Val Acc: 0.5055




Epoch 8/10, Train Loss: 0.6909, Train Acc: 0.5345, Val Loss: 0.6939, Val Acc: 0.5055




Epoch 9/10, Train Loss: 0.6909, Train Acc: 0.5345, Val Loss: 0.6947, Val Acc: 0.5055




Epoch 10/10, Train Loss: 0.6908, Train Acc: 0.5345, Val Loss: 0.6945, Val Acc: 0.5055


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁█████████
train_loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▆▆█▂█▇▄█▇

0,1
epoch,9.0
train_accuracy,0.53449
train_loss,0.69085
val_accuracy,0.50549
val_loss,0.6945


[34m[1mwandb[0m: Agent Starting Run: yu060go5 with config:
[34m[1mwandb[0m: 	activations: ['leaky_relu', 'leaky_relu', 'leaky_relu']
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_channels: [16, 32, 64, 128]
[34m[1mwandb[0m: 	dropout_rates: [0]
[34m[1mwandb[0m: 	fc_sizes: [128]
[34m[1mwandb[0m: 	learning_rate: 0.0056988487949122205
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	use_batch_norm: True




Epoch 1/10, Train Loss: 0.4615, Train Acc: 0.8541, Val Loss: 0.2659, Val Acc: 0.8987




Epoch 2/10, Train Loss: 0.2749, Train Acc: 0.8987, Val Loss: 0.2780, Val Acc: 0.8950




Epoch 3/10, Train Loss: 0.2415, Train Acc: 0.9081, Val Loss: 0.2312, Val Acc: 0.9243




Epoch 4/10, Train Loss: 0.2286, Train Acc: 0.9093, Val Loss: 0.1910, Val Acc: 0.9365




Epoch 5/10, Train Loss: 0.2011, Train Acc: 0.9286, Val Loss: 0.1921, Val Acc: 0.9231




Epoch 6/10, Train Loss: 0.1921, Train Acc: 0.9258, Val Loss: 0.2217, Val Acc: 0.9280




Epoch 7/10, Train Loss: 0.1826, Train Acc: 0.9325, Val Loss: 0.1398, Val Acc: 0.9512




Epoch 8/10, Train Loss: 0.1588, Train Acc: 0.9423, Val Loss: 0.1558, Val Acc: 0.9512




Epoch 9/10, Train Loss: 0.1434, Train Acc: 0.9466, Val Loss: 0.1390, Val Acc: 0.9524




Epoch 10/10, Train Loss: 0.1437, Train Acc: 0.9457, Val Loss: 0.1599, Val Acc: 0.9475


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▅▇▆▇███
train_loss,█▄▃▃▂▂▂▁▁▁
val_accuracy,▁▁▅▆▄▅███▇
val_loss,▇█▆▄▄▅▁▂▁▂

0,1
epoch,9.0
train_accuracy,0.94567
train_loss,0.14372
val_accuracy,0.9475
val_loss,0.1599


[34m[1mwandb[0m: Agent Starting Run: bdk9h1w6 with config:
[34m[1mwandb[0m: 	activations: ['leaky_relu', 'leaky_relu', 'leaky_relu']
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_channels: [32, 64, 128]
[34m[1mwandb[0m: 	dropout_rates: [0]
[34m[1mwandb[0m: 	fc_sizes: [256]
[34m[1mwandb[0m: 	learning_rate: 0.004040960480563529
[34m[1mwandb[0m: 	optimizer: adamw
[34m[1mwandb[0m: 	use_batch_norm: False




Epoch 1/10, Train Loss: 0.4403, Train Acc: 0.8327, Val Loss: 0.2263, Val Acc: 0.9121




Epoch 2/10, Train Loss: 0.2629, Train Acc: 0.9045, Val Loss: 0.2926, Val Acc: 0.8938




Epoch 3/10, Train Loss: 0.2165, Train Acc: 0.9182, Val Loss: 0.2206, Val Acc: 0.9280




Epoch 4/10, Train Loss: 0.2156, Train Acc: 0.9179, Val Loss: 0.3841, Val Acc: 0.8596




Epoch 5/10, Train Loss: 0.2055, Train Acc: 0.9203, Val Loss: 0.1858, Val Acc: 0.9243




Epoch 6/10, Train Loss: 0.2288, Train Acc: 0.9121, Val Loss: 0.1688, Val Acc: 0.9414




Epoch 7/10, Train Loss: 0.1739, Train Acc: 0.9313, Val Loss: 0.1342, Val Acc: 0.9463




Epoch 8/10, Train Loss: 0.1778, Train Acc: 0.9350, Val Loss: 0.1847, Val Acc: 0.9304




Epoch 9/10, Train Loss: 0.2362, Train Acc: 0.9154, Val Loss: 0.1535, Val Acc: 0.9414




Epoch 10/10, Train Loss: 0.1674, Train Acc: 0.9393, Val Loss: 0.2719, Val Acc: 0.9060


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇▇▆▇█▆█
train_loss,█▃▂▂▂▃▁▁▃▁
val_accuracy,▅▄▇▁▆██▇█▅
val_loss,▄▅▃█▂▂▁▂▂▅

0,1
epoch,9.0
train_accuracy,0.93926
train_loss,0.16735
val_accuracy,0.90598
val_loss,0.27186


[34m[1mwandb[0m: Agent Starting Run: zjbwu04z with config:
[34m[1mwandb[0m: 	activations: ['relu', 'relu', 'relu']
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_channels: [32, 64, 128]
[34m[1mwandb[0m: 	dropout_rates: [0]
[34m[1mwandb[0m: 	fc_sizes: [256]
[34m[1mwandb[0m: 	learning_rate: 0.004531057092431919
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	use_batch_norm: False




Epoch 1/10, Train Loss: 0.3714, Train Acc: 0.8513, Val Loss: 0.2427, Val Acc: 0.8999




Epoch 2/10, Train Loss: 0.2434, Train Acc: 0.9042, Val Loss: 0.2034, Val Acc: 0.9182




Epoch 3/10, Train Loss: 0.2284, Train Acc: 0.9161, Val Loss: 0.2099, Val Acc: 0.9158




Epoch 4/10, Train Loss: 0.2034, Train Acc: 0.9216, Val Loss: 0.2625, Val Acc: 0.9084




Epoch 5/10, Train Loss: 0.2377, Train Acc: 0.9127, Val Loss: 0.1941, Val Acc: 0.9304




Epoch 6/10, Train Loss: 0.1909, Train Acc: 0.9277, Val Loss: 0.1906, Val Acc: 0.9377




Epoch 7/10, Train Loss: 0.2050, Train Acc: 0.9209, Val Loss: 0.1687, Val Acc: 0.9304




Epoch 8/10, Train Loss: 0.1713, Train Acc: 0.9313, Val Loss: 0.1675, Val Acc: 0.9304




Epoch 9/10, Train Loss: 0.1736, Train Acc: 0.9328, Val Loss: 0.1753, Val Acc: 0.9353




Epoch 10/10, Train Loss: 0.1647, Train Acc: 0.9307, Val Loss: 0.1536, Val Acc: 0.9426


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇▆█▇███
train_loss,█▄▃▂▃▂▂▁▁▁
val_accuracy,▁▄▄▂▆▇▆▆▇█
val_loss,▇▄▅█▄▃▂▂▂▁

0,1
epoch,9.0
train_accuracy,0.93071
train_loss,0.16467
val_accuracy,0.94261
val_loss,0.15362


[34m[1mwandb[0m: Agent Starting Run: ujmtjct5 with config:
[34m[1mwandb[0m: 	activations: ['leaky_relu', 'leaky_relu', 'leaky_relu']
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_channels: [32, 64, 128]
[34m[1mwandb[0m: 	dropout_rates: [0]
[34m[1mwandb[0m: 	fc_sizes: [128, 64]
[34m[1mwandb[0m: 	learning_rate: 0.001488867856424226
[34m[1mwandb[0m: 	optimizer: adamw
[34m[1mwandb[0m: 	use_batch_norm: False




Epoch 1/10, Train Loss: 0.3530, Train Acc: 0.8657, Val Loss: 0.1992, Val Acc: 0.9206




Epoch 2/10, Train Loss: 0.2344, Train Acc: 0.9167, Val Loss: 0.2262, Val Acc: 0.9109




Epoch 3/10, Train Loss: 0.2144, Train Acc: 0.9261, Val Loss: 0.1990, Val Acc: 0.9158




Epoch 4/10, Train Loss: 0.1791, Train Acc: 0.9310, Val Loss: 0.1392, Val Acc: 0.9499




Epoch 5/10, Train Loss: 0.1739, Train Acc: 0.9325, Val Loss: 0.1201, Val Acc: 0.9597




Epoch 6/10, Train Loss: 0.1580, Train Acc: 0.9444, Val Loss: 0.1336, Val Acc: 0.9585




Epoch 7/10, Train Loss: 0.1338, Train Acc: 0.9502, Val Loss: 0.1084, Val Acc: 0.9609




Epoch 8/10, Train Loss: 0.1288, Train Acc: 0.9512, Val Loss: 0.1157, Val Acc: 0.9487




Epoch 9/10, Train Loss: 0.1084, Train Acc: 0.9615, Val Loss: 0.0843, Val Acc: 0.9670




Epoch 10/10, Train Loss: 0.0950, Train Acc: 0.9634, Val Loss: 0.1086, Val Acc: 0.9658


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▅▆▆▇▇▇██
train_loss,█▅▄▃▃▃▂▂▁▁
val_accuracy,▂▁▂▆▇▇▇▆██
val_loss,▇█▇▄▃▃▂▃▁▂

0,1
epoch,9.0
train_accuracy,0.96337
train_loss,0.09501
val_accuracy,0.96581
val_loss,0.10865


[34m[1mwandb[0m: Agent Starting Run: fvirl0ih with config:
[34m[1mwandb[0m: 	activations: ['leaky_relu', 'leaky_relu', 'leaky_relu']
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_channels: [16, 32, 64]
[34m[1mwandb[0m: 	dropout_rates: [0]
[34m[1mwandb[0m: 	fc_sizes: [256]
[34m[1mwandb[0m: 	learning_rate: 0.0011602096491267796
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	use_batch_norm: True




Epoch 1/10, Train Loss: 0.3056, Train Acc: 0.8761, Val Loss: 0.3049, Val Acc: 0.8828




Epoch 2/10, Train Loss: 0.2215, Train Acc: 0.9167, Val Loss: 0.2034, Val Acc: 0.9255




Epoch 3/10, Train Loss: 0.2034, Train Acc: 0.9203, Val Loss: 0.1539, Val Acc: 0.9353




Epoch 4/10, Train Loss: 0.1682, Train Acc: 0.9371, Val Loss: 0.1301, Val Acc: 0.9597




Epoch 5/10, Train Loss: 0.1452, Train Acc: 0.9454, Val Loss: 0.1035, Val Acc: 0.9658




Epoch 6/10, Train Loss: 0.1331, Train Acc: 0.9509, Val Loss: 0.1080, Val Acc: 0.9573




Epoch 7/10, Train Loss: 0.1287, Train Acc: 0.9515, Val Loss: 0.1546, Val Acc: 0.9463




Epoch 8/10, Train Loss: 0.1257, Train Acc: 0.9551, Val Loss: 0.0937, Val Acc: 0.9707




Epoch 9/10, Train Loss: 0.1072, Train Acc: 0.9600, Val Loss: 0.1092, Val Acc: 0.9634




Epoch 10/10, Train Loss: 0.1071, Train Acc: 0.9557, Val Loss: 0.1057, Val Acc: 0.9585


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▂▁▁
val_accuracy,▁▄▅▇█▇▆█▇▇
val_loss,█▅▃▂▁▁▃▁▂▁

0,1
epoch,9.0
train_accuracy,0.95574
train_loss,0.10706
val_accuracy,0.95849
val_loss,0.10568


[34m[1mwandb[0m: Agent Starting Run: a9pcp0oc with config:
[34m[1mwandb[0m: 	activations: ['relu', 'relu', 'relu']
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_channels: [16, 32, 64, 128]
[34m[1mwandb[0m: 	dropout_rates: [0.3]
[34m[1mwandb[0m: 	fc_sizes: [128]
[34m[1mwandb[0m: 	learning_rate: 0.005529335097769525
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	use_batch_norm: False




Epoch 1/10, Train Loss: 0.4475, Train Acc: 0.8059, Val Loss: 0.2680, Val Acc: 0.8999




Epoch 2/10, Train Loss: 0.2985, Train Acc: 0.8895, Val Loss: 0.2700, Val Acc: 0.8864




Epoch 3/10, Train Loss: 0.2424, Train Acc: 0.9087, Val Loss: 0.2260, Val Acc: 0.9023




Epoch 4/10, Train Loss: 0.2244, Train Acc: 0.9197, Val Loss: 0.1989, Val Acc: 0.9316




Epoch 5/10, Train Loss: 0.2049, Train Acc: 0.9243, Val Loss: 0.2350, Val Acc: 0.9035




Epoch 6/10, Train Loss: 0.1864, Train Acc: 0.9332, Val Loss: 0.1832, Val Acc: 0.9365




Epoch 7/10, Train Loss: 0.1950, Train Acc: 0.9304, Val Loss: 0.1831, Val Acc: 0.9341




Epoch 8/10, Train Loss: 0.1927, Train Acc: 0.9270, Val Loss: 0.1523, Val Acc: 0.9438




Epoch 9/10, Train Loss: 0.1689, Train Acc: 0.9350, Val Loss: 0.1287, Val Acc: 0.9487




Epoch 10/10, Train Loss: 0.1433, Train Acc: 0.9447, Val Loss: 0.1282, Val Acc: 0.9463


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▇▇▇▇▇██
train_loss,█▅▃▃▂▂▂▂▂▁
val_accuracy,▃▁▃▆▃▇▆▇██
val_loss,██▆▄▆▄▄▂▁▁

0,1
epoch,9.0
train_accuracy,0.94475
train_loss,0.14331
val_accuracy,0.94628
val_loss,0.12822


[34m[1mwandb[0m: Agent Starting Run: 3ti8y94v with config:
[34m[1mwandb[0m: 	activations: ['relu', 'relu', 'relu']
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_channels: [16, 32, 64]
[34m[1mwandb[0m: 	dropout_rates: [0]
[34m[1mwandb[0m: 	fc_sizes: [128, 64]
[34m[1mwandb[0m: 	learning_rate: 0.0008802964311920451
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	use_batch_norm: True


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x787be107ef90>> (for post_run_cell), with arguments args (<ExecutionResult object at 787be323a990, execution_count=7 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 787be323a8a0, raw_cell="# Initialize a sweep
sweep_id = wandb.sweep(sweep_.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell:/home/ritish/prog/college/VR/mini_project_1/face_mask_classifier.ipynb#X15sZmlsZQ%3D%3D> result=None>,),kwargs {}:


Exception in thread Thread-47 (_run_job):


BrokenPipeError: [Errno 32] Broken pipe

Traceback (most recent call last):


  File [35m"/home/ritish/prog/ML/.venv/lib/python3.13/site-packages/ipykernel/iostream.py"[0m, line [35m509[0m, in [35mparent_header[0m
    return [31mself._parent_header.get[0m[1;31m()[0m
           [31m~~~~~~~~~~~~~~~~~~~~~~~[0m[1;31m^^[0m


In [9]:
# Test accuracy on validation set
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Validation accuracy: {correct / total}')

Validation accuracy: 0.8974358974358975
