# Sweeps notebook

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pprint
import os
import re
import json
import sys
import wandb
from model import *
from pytorch_lightning.loggers import WandbLogger
import tensorflow



In [2]:
# Define sweep config
sweep_config = {
    'method': 'random' # random, grid, or bayes
    }

In [3]:
# Define metric
metric = {
    'name': 'val_loss',
    'goal': 'minimize'   # minimize or maximize
    }
# add in sweep_config
sweep_config['metric'] = metric

In [4]:
epochs = 30
architecture = 'CNN'
dataset = 'Poems'
# Define hyperparameter space
parameters_dict = {
    'optimizer': {
        'values': ['adam', 'sgd']
        },
    'embedding_dim': {
        'values': [128, 256, 512]
        },
    }
# we can indicate the distribution for continuous variables
parameters_dict.update({
    'learning_rate': {
        # a flat distribution between 0 and 0.1
        'distribution': 'uniform',
        'min': 0.0001,
        'max': 0.1
      }
    })
# we set values that we want to track but don't want to change, just indicate 1 value
parameters_dict.update({
    'epochs': {'value': epochs},
    "architecture":{'value': architecture},
    "dataset": {'value': dataset},        
    })
# add params in sweep_config
sweep_config['parameters'] = parameters_dict

In [5]:
pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'val_loss'},
 'parameters': {'architecture': {'value': 'CNN'},
                'dataset': {'value': 'Poems'},
                'embedding_dim': {'values': [128, 256, 512]},
                'epochs': {'value': 30},
                'learning_rate': {'distribution': 'uniform',
                                  'max': 0.1,
                                  'min': 0.0001},
                'optimizer': {'values': ['adam', 'sgd']}}}


In [6]:
# Log in to weights and biases
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjjdsantiago3[0m ([33mmsds_mlops2023_lt2[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [7]:
sweep_id = wandb.sweep(sweep_config, project="mlo-final-project")

Create sweep with ID: w94pa8s6
Sweep URL: https://wandb.ai/msds_mlops2023_lt2/mlo-final-project/sweeps/w94pa8s6


In [8]:
# define hyperparam search function
def hyperparam_search(config=None):
    # Initialize a new wandb run
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config
        # use config params
        learning_rate = config.learning_rate
        optimizer = config.optimizer
        embedding_dim = config.embedding_dim
        epochs = config.epochs

        # define model
        _model = poem_classifier_model()

        _model.load_data()
        _model.preprocess()
        _model.train(embd_dim=embedding_dim, epochs=epochs, lr=learning_rate, optimizer=optimizer)

        # print(_model.trained_model.history)

        for i in range(epochs):
            wandb.log({"train_loss": _model.trained_model.history['loss'][i],
                       "train_acc": _model.trained_model.history['acc'][i],
                       "val_loss": _model.trained_model.history['val_loss'][i], 
                       "val_acc": _model.trained_model.history['val_acc'][i]})
        
        results = _model.test()
        wandb.log({"test_loss": results[0], "test_acc": results[1]})

wandb.agent(sweep_id, hyperparam_search, count=10) # count - num iters

[34m[1mwandb[0m: Agent Starting Run: oia9cawn with config:
[34m[1mwandb[0m: 	architecture: CNN
[34m[1mwandb[0m: 	dataset: Poems
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	learning_rate: 0.009654640739687003
[34m[1mwandb[0m: 	optimizer: adam
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  self.df_train['Poem'] = self.df_train['Poem'].str.replace('shade.When', 'shade. When')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('afraid.Now', 'afraid. Now')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('afraid.Now,', 'afraid. Now,')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('Big Game.Bigger', 'Big Game. Bigger')
[nltk_data] Downloading package stopwords to C:\Users\Jesli's
[nltk_data]     Laptop\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
{'loss': [1.3557945489883423, 1.2712337970733643, 1.1378117799758911, 0.9841220378875732, 0.8748316764831543, 0.7654136419296265, 0.6757111549377441, 0.6081585884094238, 0.5425299406051636, 0.4939087927341461, 0.45133116841316223, 0.4482845664024353, 0.40440648794174194, 0.35823753476142883, 0.34378066658973694, 0.31775376200675964, 0.3044026494026184, 0.2828969359397888, 0.24840952455997467, 0.26611682772636414, 0.23987826704978943, 0.23478569090366364, 0.21785522997379303, 0.2172151505947113, 0.21710717678070068, 0.20698589086532593, 0.21353696286678314, 0.20999640226364136, 0.19212615489959717, 0.18654842674732208], 'acc': [0.310911804437

0,1
test_acc,▁
test_loss,▁
train_acc,▁▃▃▄▅▆▆▆▇▇▇▇▇▇██▇█████████████
train_loss,█▇▇▆▅▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▆█▆██▆▆▆▅▅▆▆▆▅▆▆▆▄▇▅▆▆▆▅▄▅▆▆▅
val_loss,▁▁▁▁▁▁▁▂▂▂▃▃▃▄▄▄▅▅▆▆▆▆▇▇▇█████

0,1
test_acc,0.33333
test_loss,2.91291
train_acc,0.92676
train_loss,0.18655
val_acc,0.35714
val_loss,3.39523


[34m[1mwandb[0m: Agent Starting Run: e0m21s3t with config:
[34m[1mwandb[0m: 	architecture: CNN
[34m[1mwandb[0m: 	dataset: Poems
[34m[1mwandb[0m: 	embedding_dim: 512
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	learning_rate: 0.02808494175379332
[34m[1mwandb[0m: 	optimizer: adam
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/30


  self.df_train['Poem'] = self.df_train['Poem'].str.replace('shade.When', 'shade. When')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('afraid.Now', 'afraid. Now')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('afraid.Now,', 'afraid. Now,')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('Big Game.Bigger', 'Big Game. Bigger')
[nltk_data] Downloading package stopwords to C:\Users\Jesli's
[nltk_data]     Laptop\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
{'loss': [1.3855904340744019, 1.1002875566482544, 0.8623965382575989, 0.6973595023155212, 0.5562048554420471, 0.4759288728237152, 0.5132670998573303, 0.5551157593727112, 0.37232211232185364, 0.3913919925689697, 0.36700159311294556, 0.3947390913963318, 0.3411308228969574, 0.38246607780456543, 0.36294829845428467, 0.3544546365737915, 0.35878920555114746, 0.2559962570667267, 0.33474141359329224, 0.26699212193489075, 0.2796787917613983, 0.2946580648422241, 0.31691837310791016, 0.29700133204460144, 0.2745542526245117, 0.28683018684387207, 0.26090213656425476, 0.2605212926864624, 0.2632770538330078, 0.2816820740699768], 'acc': [0.3437967002391815, 0.50971597

0,1
test_acc,▁
test_loss,▁
train_acc,▁▃▅▅▆▇▇▆▇▇▇▇█▇▇███████████████
train_loss,█▆▅▄▃▂▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▂▅▅▅▅▆▅▄▁▂█▅▄▅▃▆▅▅▄▃▅▄▄▆▆▄▂▂▆▄
val_loss,▁▁▂▂▂▃▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇█▇█▇▇███

0,1
test_acc,0.26
test_loss,5.34036
train_acc,0.91031
train_loss,0.28168
val_acc,0.39286
val_loss,5.16435


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0poerlim with config:
[34m[1mwandb[0m: 	architecture: CNN
[34m[1mwandb[0m: 	dataset: Poems
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	learning_rate: 0.07808868766322856
[34m[1mwandb[0m: 	optimizer: sgd
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/30


  self.df_train['Poem'] = self.df_train['Poem'].str.replace('shade.When', 'shade. When')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('afraid.Now', 'afraid. Now')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('afraid.Now,', 'afraid. Now,')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('Big Game.Bigger', 'Big Game. Bigger')
[nltk_data] Downloading package stopwords to C:\Users\Jesli's
[nltk_data]     Laptop\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
{'loss': [1.382807970046997, 1.376538634300232, 1.3738027811050415, 1.3723978996276855, 1.3719202280044556, 1.3712555170059204, 1.3718206882476807, 1.3698400259017944, 1.3707811832427979, 1.3700497150421143, 1.3675329685211182, 1.3697049617767334, 1.370110273361206, 1.3689026832580566, 1.3676432371139526, 1.3683067560195923, 1.3683931827545166, 1.369033694267273, 1.3674838542938232, 1.3671972751617432, 1.365566611289978, 1.3676074743270874, 1.366487979888916, 1.3664337396621704, 1.3663092851638794, 1.3669580221176147, 1.3672477006912231, 1.3660056591033936, 1.3641823530197144, 1.3670399188995361], 'acc': [0.2556053698062897, 0.28550073504447937, 0.2899

0,1
test_acc,▁
test_loss,▁
train_acc,▁▅▆▃▅▅▅▇▄▆█▄▆▃▅▅▆▅▆▇█▄▅▇▅▄▄▇▆▇
train_loss,█▆▅▄▄▄▄▃▃▃▂▃▃▃▂▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂
val_acc,▆▂▂▂▂▂▂▂▁▂▂▂▂▇▆▄▇▇▇▆▇▇▇▄▄▇█▇▅▇
val_loss,█▆▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
test_acc,0.11333
test_loss,1.56987
train_acc,0.29746
train_loss,1.36704
val_acc,0.34524
val_loss,1.33799


[34m[1mwandb[0m: Agent Starting Run: 01u4o2jb with config:
[34m[1mwandb[0m: 	architecture: CNN
[34m[1mwandb[0m: 	dataset: Poems
[34m[1mwandb[0m: 	embedding_dim: 128
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	learning_rate: 0.06779997706885234
[34m[1mwandb[0m: 	optimizer: sgd
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/30


  self.df_train['Poem'] = self.df_train['Poem'].str.replace('shade.When', 'shade. When')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('afraid.Now', 'afraid. Now')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('afraid.Now,', 'afraid. Now,')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('Big Game.Bigger', 'Big Game. Bigger')
[nltk_data] Downloading package stopwords to C:\Users\Jesli's
[nltk_data]     Laptop\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
{'loss': [1.3806072473526, 1.3725793361663818, 1.3697501420974731, 1.3681180477142334, 1.3673616647720337, 1.367000937461853, 1.3665555715560913, 1.3666454553604126, 1.364594578742981, 1.3647093772888184, 1.3643940687179565, 1.363925814628601, 1.3645553588867188, 1.3647733926773071, 1.3628594875335693, 1.3634876012802124, 1.362831950187683, 1.3611506223678589, 1.3629497289657593, 1.3619410991668701, 1.3626422882080078, 1.3616015911102295, 1.361124038696289, 1.3612526655197144, 1.360807180404663, 1.3607882261276245, 1.3608750104904175, 1.358074426651001, 1.3594741821289062, 1.361010193824768], 'acc': [0.27204781770706177, 0.2825112044811249, 0.288490295

0,1
test_acc,▁
test_loss,▁
train_acc,▂▃▄▃▂▂▃▁▅▆▇▄▆▅▆▅▄▅▅▃▃▅▄▅▄▅▅█▇▄
train_loss,█▆▅▄▄▄▄▄▃▃▃▃▃▃▂▃▂▂▃▂▂▂▂▂▂▂▂▁▁▂
val_acc,█▇▇▆▆▁▇▅▆▄▄▆▅▇▇▅▅█▆▇▅▄▅▆▅▇▅▅█▇
val_loss,█▅▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▂▂▂▁▂▁▁▂▁▁

0,1
test_acc,0.14
test_loss,1.60886
train_acc,0.29447
train_loss,1.36101
val_acc,0.30952
val_loss,1.36017


[34m[1mwandb[0m: Agent Starting Run: e1tet2j1 with config:
[34m[1mwandb[0m: 	architecture: CNN
[34m[1mwandb[0m: 	dataset: Poems
[34m[1mwandb[0m: 	embedding_dim: 512
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	learning_rate: 0.02439222145389936
[34m[1mwandb[0m: 	optimizer: adam
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/30


  self.df_train['Poem'] = self.df_train['Poem'].str.replace('shade.When', 'shade. When')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('afraid.Now', 'afraid. Now')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('afraid.Now,', 'afraid. Now,')
  self.df_train['Poem'] = self.df_train['Poem'].str.replace('Big Game.Bigger', 'Big Game. Bigger')
[nltk_data] Downloading package stopwords to C:\Users\Jesli's
[nltk_data]     Laptop\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30

In [None]:
wandb.finish()