In [1]:
# HPS using wandb for cool graphs

Requirement already up-to-date: wandb in /its/home/nn268/.local/lib/python3.8/site-packages (0.15.10)


In [1]:
# Imports

import cv2
from PIL import Image

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import math as maths

import os
import random

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.nn import functional
#from torchsummary import summary
#import torchvision.transforms as transforms

from tqdm import tqdm
from IPython.display import clear_output

import wandb
import pprint

from functions import Unwrap, label_oh_tf, loop, import_imagedata, ImageProcessor, test_loop, get_data
from architectures import build_net



In [2]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mnaughticalnonsence[0m ([33mantvis[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

# Define Sweep

In [8]:
# define sweep in a config
# in notebook - nested dict
# in cmd - yaml

# grid search
# bayesian search
# random search

config ={
    'method': 'random'
}

metric = {'name': 'loss',
         'goal': 'minimize'}

config['metric'] = metric

param_dict ={
    'optimizer':{
        'values': ['adam']
    },
    'lin_layer_size': {
        'values': [100,150, 50]
    },
    'dropout': {
        'values': [0.3, 0.4, 0.5]
    },
    'first_linear':{
        'values': [14336]
    }
}

config['parameters'] = param_dict

param_dict.update({
    'epochs': {
        'value': 40
    }
})



param_dict.update({
    'learning_rate': {
        # a flat distribution between 0 and 0.1
        'distribution': 'log_uniform_values',
        'min': 0.0001,
        'max': 0.1
      },
    'weight_decay':{
            'values': [1e-5,2e-5, 3e-5,4e-5]
      }
    })



pprint.pprint(config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'loss'},
 'parameters': {'dropout': {'values': [0.3, 0.4, 0.5]},
                'epochs': {'value': 40},
                'first_linear': {'values': [14336]},
                'learning_rate': {'distribution': 'log_uniform_values',
                                  'max': 0.1,
                                  'min': 0.0001},
                'lin_layer_size': {'values': [100, 150, 50]},
                'optimizer': {'values': ['adam']},
                'weight_decay': {'values': [1e-05, 2e-05, 3e-05, 4e-05]}}}


In [2]:

config = dict(
    epochs= 100, #model, config.optimizer, config.learning_rate, config.weight_decay
    learning_rate =3.08e-5,
    dataset= 'IDSW',
    architecture ='CNN',
    optimizer= 'adam',
    weight_decay= 4e-5,
    kernal_size =3,
    first_in_channel = 2
)


col_dict = {
    'colour': 'nored',
    'size': [32,32], #36, 113   
}

In [26]:
print(f"HPS_wrapped_{col_dict['colour']}")

HPS_wrapped_nored


# Init sweep

In [9]:
sweep_id = wandb.sweep(config, project=f"HPS_UNwrapped_{col_dict['colour']}")

device = "cuda:1" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Create sweep with ID: sdxj9fab
Sweep URL: https://wandb.ai/antvis/HPS_UNwrapped_nored/sweeps/sdxj9fab
Using cuda:1 device


In [None]:
""" if (batch_count +1)%25 ==0:
            train_log(t_loss,v_loss, sample_count, epoch)"""

In [11]:

                            # Common functions
x_train, y_train, x_val, y_val, x_test, y_test = get_data()

from torch.utils.data import DataLoader
train_loader = DataLoader(list(zip(x_train, y_train)), batch_size=4, shuffle=True)

#####
    
def build_optimizer(network, optimizer, learning_rate, weight_decay=0):
    if optimizer == 'SGD':
        optimizer = torch.optim.SGD(network.parameters(),
                              lr=learning_rate, momentum=0.9)
    elif optimizer == "adam":
        if weight_decay == 0:
            optimizer = torch.optim.Adam(network.parameters(),
                               lr=learning_rate)
        optimizer = torch.optim.Adam(network.parameters(),
                               lr=learning_rate, weight_decay=weight_decay)
    return optimizer

def train_log(t_loss, v_loss, sample_count, epoch):
    wandb.log({'epoch': epoch,
              't_loss': t_loss,
              'v_loss': v_loss},
             step=sample_count)
    print(f'loss after {str(sample_count).zfill(5)} examples: {v_loss:.3f}')

                                # HP Sweep
def train(config=None):
    with wandb.init(config=config):
        config = wandb.config

        model = build_net(config.lin_layer_size,config.dropout, config.first_lin_lay,ks= config.kernal_size,in_chan= config.first_in_channel, pad=False).to(device)
        loss_fn = nn.MSELoss()
        
        e_count = 0
        #optimizer = build_optimizer(network, config.optimizer, config.learning_rate, config.weight_decay)
        if e_count >= 20:
            optimizer = build_optimizer(model, config.optimizer, config.learning_rate, config.weight_decay)
        else:
            optimizer = build_optimizer(model, config.optimizer, config.learning_rate)
        
        for epoch in range(config.epochs):

            t_loss, predict_list, t_num_correct, model, optimizer= loop(model, x_train, y_train, epoch, loss_fn, device, col_dict, optimizer=optimizer)
            
            t_accuracy = (t_num_correct /len(x_train))*100
            
            v_loss, __, v_num_correct= loop(model, x_val, y_val, epoch, loss_fn, device,col_dict, train=False) 
            
            v_accuracy= (v_num_correct / len(x_val))*100
            
            t_avg_loss =t_loss/len(x_train)
            v_avg_loss = v_loss /len(x_val)
            
            e_count +=1
            
            wandb.log({'avg_train_loss': t_avg_loss, 'epoch':epoch})
            wandb.log({'avg_val_loss': v_avg_loss, 'epoch':epoch})
            wandb.log({'train_loss': t_loss, 'epoch':epoch})
            wandb.log({'val_loss': v_loss, 'epoch':epoch})
            wandb.log({'train_accuracy_%': t_accuracy})
            wandb.log({'val_accuracy_%': v_accuracy})

                                #Training
            
def train_model(model, x_train, y_train, x_val, y_val,loss_fn, config):
    wandb.watch(model, loss_fn, log='all', log_freq=10)
    
    sample_count =0
    #batch_count = 0
    e_count = 0
    
    for epoch in tqdm(range(config.epochs)):
        if e_count >= 20:
            optimizer = build_optimizer(model, config.optimizer, config.learning_rate, config.weight_decay)
        else:
            optimizer = build_optimizer(model, config.optimizer, config.learning_rate)
        for x_batch, y_batch in train_loader:
            #train
            t_loss, predict_list, t_num_correct, model, optimizer = loop(model, x_batch, y_batch, epoch, loss_fn, device, col_dict, optimizer=optimizer)
            sample_count += len(x_train)
            t_accuracy= (t_num_correct / len(x_train))*100

            # validation
            v_loss, __, v_num_correct= loop(model, x_val, y_val, epoch, loss_fn, device,col_dict, train=False) 
            v_accuracy= (v_num_correct / len(x_val))*100
            #batch_count +=1

            wandb.log({'train_loss': t_loss, 'epoch':epoch})
            wandb.log({'val_loss': v_loss, 'epoch':epoch})

            t_avg_loss =t_loss/len(x_train)
            v_avg_loss = v_loss /len(x_val)
            wandb.log({'avg_train_loss': t_avg_loss, 'epoch':epoch})
            wandb.log({'avg_val_loss': v_avg_loss, 'epoch':epoch})

            wandb.log({'train_accuracy_%': t_accuracy})
            wandb.log({'val_accuracy_%': v_accuracy})

            e_count +=1
            clear_output()


def pipeline(hp): 
    
    title = 'nored_Wrapped_3232'
    
    x_train, y_train, x_val, y_val, x_test, y_test = get_data()
    
    with wandb.init(project=title, config=hp):
        config = wandb.config
        model = build_net(lin_layer_size =100,dropout =0, first_lin_lay=4096,ks= config.kernal_size,in_chan= config.first_in_channel).to(device)
        loss_fn = nn.MSELoss()
        
        train_model(model, x_train, y_train, x_val, y_val,loss_fn, config)
        test_loop(model, x_test, y_test, loss_fn, device, col_dict,title,  WANDB=True, wandb=wandb)
        
    return model
        

In [7]:
print(len(x), len(y))
print(np.unique(y))

1034 1034
['0' '1' '10' '2' '3' '4' '5' '6' '7' '8' '9']


In [None]:
model = pipeline(config)

In [12]:
wandb.agent(sweep_id, train, count=25)

[34m[1mwandb[0m: Agent Starting Run: 1z4nh0u0 with config:
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	first_linear: 14336
[34m[1mwandb[0m: 	learning_rate: 0.0028233949008654433
[34m[1mwandb[0m: 	lin_layer_size: 150
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 3e-05


Traceback (most recent call last):
  File "/its/home/nn268/.local/lib/python3.8/site-packages/wandb/sdk/wandb_config.py", line 162, in __getattr__
    return self.__getitem__(key)
  File "/its/home/nn268/.local/lib/python3.8/site-packages/wandb/sdk/wandb_config.py", line 130, in __getitem__
    return self._items[key]
KeyError: 'first_lin_lay'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "<ipython-input-11-e0214b1a719c>", line 33, in train
    model = build_net(config.lin_layer_size,config.dropout, config.first_lin_lay,ks= config.kernal_size,in_chan= config.first_in_channel, pad=False).to(device)
  File "/its/home/nn268/.local/lib/python3.8/site-packages/wandb/sdk/wandb_config.py", line 164, in __getattr__
    raise AttributeError(
AttributeError: <class 'wandb.sdk.wandb_config.Config'> object has no attribute 'first_lin_lay'


Run 1z4nh0u0 errored: AttributeError("<class 'wandb.sdk.wandb_config.Config'> object has no attribute 'first_lin_lay'")
[34m[1mwandb[0m: [32m[41mERROR[0m Run 1z4nh0u0 errored: AttributeError("<class 'wandb.sdk.wandb_config.Config'> object has no attribute 'first_lin_lay'")
[34m[1mwandb[0m: Agent Starting Run: p4e7mwmy with config:
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	first_linear: 14336
[34m[1mwandb[0m: 	learning_rate: 0.00013284834658915703
[34m[1mwandb[0m: 	lin_layer_size: 150
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 3e-05


Traceback (most recent call last):
  File "/its/home/nn268/.local/lib/python3.8/site-packages/wandb/sdk/wandb_config.py", line 162, in __getattr__
    return self.__getitem__(key)
  File "/its/home/nn268/.local/lib/python3.8/site-packages/wandb/sdk/wandb_config.py", line 130, in __getitem__
    return self._items[key]
KeyError: 'first_lin_lay'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "<ipython-input-11-e0214b1a719c>", line 33, in train
    model = build_net(config.lin_layer_size,config.dropout, config.first_lin_lay,ks= config.kernal_size,in_chan= config.first_in_channel, pad=False).to(device)
  File "/its/home/nn268/.local/lib/python3.8/site-packages/wandb/sdk/wandb_config.py", line 164, in __getattr__
    raise AttributeError(
AttributeError: <class 'wandb.sdk.wandb_config.Config'> object has no attribute 'first_lin_lay'


Run p4e7mwmy errored: AttributeError("<class 'wandb.sdk.wandb_config.Config'> object has no attribute 'first_lin_lay'")
[34m[1mwandb[0m: [32m[41mERROR[0m Run p4e7mwmy errored: AttributeError("<class 'wandb.sdk.wandb_config.Config'> object has no attribute 'first_lin_lay'")
[34m[1mwandb[0m: Agent Starting Run: ic2i45i3 with config:
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	first_linear: 14336
[34m[1mwandb[0m: 	learning_rate: 0.001077753795138171
[34m[1mwandb[0m: 	lin_layer_size: 50
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 2e-05


Traceback (most recent call last):
  File "/its/home/nn268/.local/lib/python3.8/site-packages/wandb/sdk/wandb_config.py", line 162, in __getattr__
    return self.__getitem__(key)
  File "/its/home/nn268/.local/lib/python3.8/site-packages/wandb/sdk/wandb_config.py", line 130, in __getitem__
    return self._items[key]
KeyError: 'first_lin_lay'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "<ipython-input-11-e0214b1a719c>", line 33, in train
    model = build_net(config.lin_layer_size,config.dropout, config.first_lin_lay,ks= config.kernal_size,in_chan= config.first_in_channel, pad=False).to(device)
  File "/its/home/nn268/.local/lib/python3.8/site-packages/wandb/sdk/wandb_config.py", line 164, in __getattr__
    raise AttributeError(
AttributeError: <class 'wandb.sdk.wandb_config.Config'> object has no attribute 'first_lin_lay'


Run ic2i45i3 errored: AttributeError("<class 'wandb.sdk.wandb_config.Config'> object has no attribute 'first_lin_lay'")
[34m[1mwandb[0m: [32m[41mERROR[0m Run ic2i45i3 errored: AttributeError("<class 'wandb.sdk.wandb_config.Config'> object has no attribute 'first_lin_lay'")
Detected 3 failed runs in the first 60 seconds, killing sweep.
[34m[1mwandb[0m: [32m[41mERROR[0m Detected 3 failed runs in the first 60 seconds, killing sweep.
[34m[1mwandb[0m: To disable this check set WANDB_AGENT_DISABLE_FLAPPING=true
