In [1]:
import wandb
wandb.login()


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mtinkerfrank[0m ([33mteamnan[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [2]:
%env "WANDB_NOTEBOOK_NAME" "Apple_CNN_Sweep.ipnyb"

env: "WANDB_NOTEBOOK_NAME"="Apple_CNN_Sweep.ipnyb"


In [3]:
dataset_path = "./apple_disease_classification/Train"

In [4]:
sweep_config = {
    'method': 'random'
    }

metric = {
    'name': 'loss',
    'goal': 'minimize'   
    }

sweep_config['metric'] = metric

In [5]:
parameters_dict = {
    'optimizer': {
        'values': ['sgd','adam'] #not Adam gives:  AttributeError: "'str' object has no attribute 'zero_grad'"
        },
    'dropout': {
          'values': [0.2, 0.4, 0.5]
        },
    'conv_depth_size': {
          'values': [6,8]
    },
    'fc_layer_size': {
        'values': [128, 256]
    }
}

sweep_config['parameters'] = parameters_dict

In [6]:
parameters_dict.update({
    'resize': {
        'value': 128
        },
    'epochs': {
        'value': 35
        },
    'learning_rate': {
        # a flat distribution between 0 and 0.1
        'distribution': 'uniform',
        'min': 0.01,
        'max': 0.1
      },
    'batch_size': {
        # integers between 32 and 256
        # with evenly-distributed logarithms 
        'distribution': 'q_log_uniform_values',
        'q': 8,
        'min': 32,
        'max': 64,
      }
    })

In [7]:
import pprint

pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'loss'},
 'parameters': {'batch_size': {'distribution': 'q_log_uniform_values',
                               'max': 64,
                               'min': 32,
                               'q': 8},
                'conv_depth_size': {'values': [6, 8]},
                'dropout': {'values': [0.2, 0.4, 0.5]},
                'epochs': {'value': 35},
                'fc_layer_size': {'values': [128, 256]},
                'learning_rate': {'distribution': 'uniform',
                                  'max': 0.1,
                                  'min': 0.01},
                'optimizer': {'values': ['sgd', 'adam']},
                'resize': {'value': 128}}}


In [8]:
sweep_id = wandb.sweep(sweep_config, project="Apple_Sweep_Test")

Create sweep with ID: buhsbvwq
Sweep URL: https://wandb.ai/teamnan/Apple_Sweep_Test/sweeps/buhsbvwq


In [9]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder 
from torch.utils.data import DataLoader, random_split
from torchvision.transforms import ToTensor

import os

import matplotlib.pyplot as plt
%matplotlib inline 
import gc


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def train(config=None):
    # Initialize a new wandb run
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config

        loader = build_dataset(config.batch_size, config.resize)
        network = CNN(config.conv_depth_size, config.fc_layer_size, config.dropout).to(device)
        optimizer = build_optimizer(network, config.optimizer, config.learning_rate)
        print(optimizer)

        for epoch in range(config.epochs):
            avg_loss = train_epoch(network, loader, optimizer)
            wandb.log({"loss": avg_loss[0], "epoch": epoch,
                       "acc": avg_loss[1],"epoch": epoch})           

In [10]:
def build_dataset(batch_size,resize):
    
    from PIL import Image, ImageOps

    for subdir, dirs, files in os.walk('./apple_disease_classification/'):
        for file in files:
            filepath = subdir + os.sep + file

            # to-do pre-check for all image filetypes
            if filepath.endswith(".jpg") or filepath.endswith(".jpeg"):
                img = Image.open(filepath)
                if img.size != (resize,resize):
                    # had to delete one because the jpg was not parseable somehow, blotch #17 is png ipv jpg look into it later
                    rgb_im = img.convert('RGB')
                    rgb_im_resized = rgb_im.resize(
                        (resize, resize))  # lets try 128x128
                    rgb_im_resized.save(filepath)

                    ## Quick way to augment dataset by flipping and mirroring
                    rgb_im_flipped = ImageOps.flip(rgb_im_resized)
                    rgb_im_mirrored = ImageOps.mirror(rgb_im_resized)

                    # Modify the file path to include 'flipped' before the extension
                    name, extension = os.path.splitext(filepath)
                    flipped_filepath = name + '_flipped' + extension
                    mirrored_filepath = name + '_mirrored' + extension

                    # Save rgb_im_flipped with the new filename
                    rgb_im_flipped.save(flipped_filepath)
                    rgb_im_mirrored.save(mirrored_filepath)
    
    # Transform to tensors 
    transform = ToTensor()
    dataset = ImageFolder(dataset_path, transform=transform)

    trainloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=2)

    return trainloader
    # from PIL import Image
    # from torchvision import transforms
    # input_image = Image.open(filename)
    # preprocess = transforms.Compose([
    #     transforms.Resize(299),
    #     transforms.CenterCrop(299),
    #     transforms.ToTensor(),
    #     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # ])
    # input_tensor = preprocess(input_image)
    # input_batch = input_tensor.unsqueeze

class CNN(nn.Module):
    def __init__(self, conv_depth_size, fc_layer_size, dropout):
        super().__init__()
        self.conv1 = nn.Conv2d(3, conv_depth_size, 5)  # RGB = 3 channels, 6 deep 5 x 5 kernels
        self.conv2 = nn.Conv2d(conv_depth_size, 16, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 29 * 29, fc_layer_size)   # (128-5+1 = 124/2(pool) = 62-5+1 = 58/2 = 29) make it variable kernel and poolsize 
        self.fc2 = nn.Linear(fc_layer_size, 84)
        self.fc3 = nn.Linear(84, 4)
        self.drop1 = nn.Dropout(dropout)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 29 * 29)
        x = F.relu(self.fc1(x))
        x = self.drop1(x)
        x = F.relu(self.fc2(x))
        return self.fc3(x).to(device)

def build_optimizer(network, optimizer, learning_rate):
    if optimizer == "sgd":
        optimizer = optim.SGD(network.parameters(),lr=learning_rate, momentum=0.9)
    elif optimizer == "adam": 
        optimizer = optim.Adam(network.parameters(), lr=learning_rate)
    return optimizer


def train_epoch(network, loader, optimizer):
    CELoss = nn.CrossEntropyLoss()
    train_running_correct = 0
    cumu_loss = 0
    for idx, data in enumerate(loader):
                
        # clear previous calculated gradients
        optimizer.zero_grad()
        
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # ➡ Forward pass
        outputs = network.forward(inputs)

        # You can not use the class CrossEntropyLoss directly. You should instantiate this class before using it.
        loss = CELoss(outputs, labels)
        cumu_loss += loss.item()

        # ⬅ Backward pass + weight update
        loss.backward()
        optimizer.step()

        # calculate the accuracy
        _, preds = torch.max(outputs.data, 1)
        train_running_correct += (preds == labels).sum().item()

        # memory management: free up space
        del inputs, labels, outputs
        torch.cuda.empty_cache()
        gc.collect()

        wandb.log({"batch loss": loss.item()})

    return cumu_loss / len(loader),train_running_correct / len(loader)

In [11]:
wandb.agent(sweep_id, train, count=5)

[34m[1mwandb[0m: Agent Starting Run: 0jr6n77r with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_depth_size: 8
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 35
[34m[1mwandb[0m: 	fc_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.06907450510983473
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	resize: 128
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.06907450510983473
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.170712…

0,1
batch loss,█▇▇█▇▇▇█████▇▇▆▆▇▇▅▇█▅▇▅▅▄▄▅▃▅▃▃▇▃▁▁▄█▃▁
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
loss,█████▇█▇███▇▆▆▆▆▅▆▅▅▄▃▄▄▄▃▃▃▃▂▂▂▂▃▁

0,1
batch loss,0.289
epoch,34.0
loss,0.49637


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: k3oa9odd with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_depth_size: 6
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 35
[34m[1mwandb[0m: 	fc_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.027184221691488437
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	resize: 128
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.027184221691488437
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
batch loss,▆▆▅▄█▃▃▃▂▃▁▆▂▇▆▆▅▆▇▆▅▆▅▆▆▆▅▆▅▆▆▇▆▆▇▆▆▆▆▅
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
loss,▇▆▅▅▃▂▁▁▂▃▅█▇▇▇▇▇▇▇▇▇▇▆▆▇▇▇▇▇▇▇▇▇▇▇

0,1
batch loss,1.32378
epoch,34.0
loss,1.36405


[34m[1mwandb[0m: Agent Starting Run: 3p96ncgc with config:
[34m[1mwandb[0m: 	batch_size: 48
[34m[1mwandb[0m: 	conv_depth_size: 6
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 35
[34m[1mwandb[0m: 	fc_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.03431393259829446
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	resize: 128
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.03431393259829446
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
batch loss,▇▇▇▇▆█▇▅▇▅▅▇▆▆▆▅▅▆▅▅▄▅▄▃▃▅▃▂▄▁▂▁▂▁▃▂▁▂▁▁
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
loss,███▇▇▇▇▇▇▇▇▇▆▆▅▅▅▄▅▄▄▄▃▃▃▂▂▂▁▂▂▁▂▁▂

0,1
batch loss,0.23227
epoch,34.0
loss,0.39302


[34m[1mwandb[0m: Agent Starting Run: jju1rk3o with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_depth_size: 8
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 35
[34m[1mwandb[0m: 	fc_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.09570614543467898
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	resize: 128
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.09570614543467898
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)


0,1
batch loss,▅▆▆▁█▇▇▆▆▅▃▆▇▆▆▆▇▆▅▆▆▅▆▅▆▅▆▆▅▆▆▆▆▆▇▇▅▇▆▅
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
loss,▆▄▂▅▆▅▅▅▁█▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅

0,1
batch loss,1.32732
epoch,34.0
loss,1.36505


[34m[1mwandb[0m: Agent Starting Run: o7fsm91i with config:
[34m[1mwandb[0m: 	batch_size: 40
[34m[1mwandb[0m: 	conv_depth_size: 8
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 35
[34m[1mwandb[0m: 	fc_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.034399840907787284
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	resize: 128
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.034399840907787284
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
batch loss,██▇██▇█▇▇▆▆▆▅▆▇▆▆▅▅▅▅▄▅▄▃▃▂▂▂▂▂▂▂▃▂▁▁▁▂▁
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
loss,███████▇▇▇▆▇▇▆▆▆▅▅▄▄▄▃▃▂▃▃▃▂▂▃▂▂▂▁▁

0,1
batch loss,0.11025
epoch,34.0
loss,0.09431
