# Define the configuration

In [13]:
config = {
    "method": "random",
    "metric": {
        "goal": "maximize",
        "name": "Macro-f1-score"
    },
    "parameters": {
        "hidden_size": {
            "distribution": "int_uniform",
            "max": 1000,
            "min": 10
        },
        "num_layers": {
            "distribution": "int_uniform",
            "max": 2,
            "min": 1
        },
        "nrows": {
            "distribution": "int_uniform",
            "max": 1000,
            "min": 100
        },
        "batch_size": {
            "distribution": "int_uniform",
            "max": 200,
            "min": 50
        },
        "bow": {
            "distribution": "categorical",
            "values": ["tf_idf", "countVector"]
        },
        "learning_rate": {
            "distribution": "uniform",
            "max": 0.01,
            "min": 0.0001
        },
        "num_epochs": {
            "distribution": "int_uniform",
            "max": 60,
            "min": 5
        },
    }
}

# Define the function to run

In [14]:
import wandb
import torch
import torch.nn as nn
import numpy as np

from torch.utils.data import DataLoader

from dataSet import CustomDataDataSet
from rnn import RNN
from ffnn import FFNN
from Preprocessing import getDataFrameFromData
from BoW import TF_IDF, getCountVector
from networkFunctions import train, test

def sweep(config=None):
    torch.manual_seed(888)
    np.random.seed(888)
    
    with wandb.init(config=config):

        config = wandb.config

        train_df, train_labels = getDataFrameFromData("Archive/arxiv_train.csv", nrows=config.nrows)
        test_df, test_labels = getDataFrameFromData("Archive/arxiv_test.csv", nrows=config.nrows)

        if config.bow == "tf_idf":
            train_tensors, vocabulary = TF_IDF(train_df)
            test_tensors, _ = TF_IDF(test_df, vocabulary=vocabulary)
        elif config.bow == "countVector":
            train_tensors, vocabulary = getCountVector(train_df)
            test_tensors, _ = getCountVector(test_df, vocabulary=vocabulary)
        else:
            return
        
        train_dataset = CustomDataDataSet(train_tensors, train_labels)
        test_dataset = CustomDataDataSet(test_tensors, test_labels)
        train_dataloader = DataLoader(dataset=train_dataset,
                                    batch_size=config.batch_size,
                                    shuffle=True)
        test_dataloader = DataLoader(dataset=test_dataset,
                                    batch_size=config.batch_size,
                                    shuffle=True)
        
        model = RNN(train_tensors.shape[1],
                    config.hidden_size,
                    config.num_layers)
            
        loss_function = nn.CrossEntropyLoss()

        optimiser = torch.optim.Adam(model.parameters(),
                                        lr=config.learning_rate)
    
        for _ in range(config.num_epochs):
            train(train_dataloader, model, optimiser, loss_function)
            test(test_dataloader, model, loss_function)




# Inistialise the sweep. NB! Skip if already created

In [15]:
sweep_id = wandb.sweep(config, entity="jespermagnus", project="Epochs and You")

Create sweep with ID: cxle7d20
Sweep URL: https://wandb.ai/jespermagnus/Epochs%20and%20You/sweeps/cxle7d20


# Run a sweep agent, using count to specify how many runs (optional). If run already exists, then copy the id from W&B

In [16]:
wandb.agent(sweep_id, sweep)

[34m[1mwandb[0m: Agent Starting Run: uzhx6ldy with config:
[34m[1mwandb[0m: 	batch_size: 119
[34m[1mwandb[0m: 	bow: countVector
[34m[1mwandb[0m: 	hidden_size: 970
[34m[1mwandb[0m: 	learning_rate: 0.007926661884061126
[34m[1mwandb[0m: 	nrows: 224
[34m[1mwandb[0m: 	num_epochs: 32
[34m[1mwandb[0m: 	num_layers: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
Timed out waiting for wandb service to start after 30.0 seconds. Try increasing the timeout with the `_service_wait` setting.
Traceback (most recent call last):
  File "C:\Users\jespe\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\wandb\sdk\wandb_init.py", line 1172, in init
    wi.setup(kwargs)
  File "C:\Users\jespe\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\wandb\s

0,1
Accuracy,▄▅▅▆▇▂▅▃▃▄▃▆▆▄▆▃▂█▄▃▅▄▄▃▃▃▃▄▄▄▁▂▆
Macro-f1-score,▄▆▄▇▇▁▅▂▃▃▃▇▆▃▄▃▂█▅▄▅▅▄▄▄▃▃▅▄▃▁▂▅
Precision,▂▃▃▇▅▄▆▁▂▂▄█▆▂▅▂▆▄▅▃▃▄▂▄▂▂▂█▄▄▁▁▃
Recall,▃▄▄▅▆▂▅▃▃▄▃▆▆▃▅▂▁█▄▃▅▄▄▃▃▃▃▄▄▃▁▂▅
Test loss,▅▃▆▂▂▁▅▄▆█▂▆▃▅▆▅▂▆▅▄▅▁▃▄▅▃▅▁▅▆█▆▇
Training loss,▅▄▆▅▆▅▆▅▄▆██▂▇▁▅▅▅▅▆▆█▆▆█▇█▆▃▆▆▄▇

0,1
Accuracy,0.12868
Macro-f1-score,0.08791
Precision,0.08391
Recall,0.12698
Test loss,2.30834
Training loss,2.30537


[34m[1mwandb[0m: Agent Starting Run: whnbzhl3 with config:
[34m[1mwandb[0m: 	batch_size: 163
[34m[1mwandb[0m: 	bow: tf_idf
[34m[1mwandb[0m: 	hidden_size: 704
[34m[1mwandb[0m: 	learning_rate: 0.0014765994335545415
[34m[1mwandb[0m: 	nrows: 131
[34m[1mwandb[0m: 	num_epochs: 9
[34m[1mwandb[0m: 	num_layers: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
Timed out waiting for wandb service to start after 30.0 seconds. Try increasing the timeout with the `_service_wait` setting.
Traceback (most recent call last):
  File "C:\Users\jespe\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\wandb\sdk\wandb_init.py", line 1172, in init
    wi.setup(kwargs)
  File "C:\Users\jespe\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\wandb\sdk\wa

0,1
Accuracy,▃▄▂▂▄▂▁▁▁▄▅▅▆▅▆█▇▇▇▆▇█▆▇▇▇▄▇▆▆▃▁▄▄▄▃▅▅▆▅
Macro-f1-score,▄▄▃▃▄▂▂▁▂▄▅▅▆▆▆█▇▇▇▆▇█▇▇▇▇▅▇▆▆▃▂▅▅▄▃▆▅▆▅
Precision,▄▄▃▂▄▃▂▁▃▆▆▅▇▆▆▇▇▇▇▆▇█▇▇█▆▆▇▇▇▄▃▅▆▆▅▇▇▇▆
Recall,▃▄▃▃▄▂▁▁▁▄▆▅▆▅▆█▇▇▇▇██▆▇▇▇▅▇▇▆▃▂▅▅▄▃▆▅▆▅
Test loss,▄▃▂▃▁█▅▅▄▄▅▂▄▆▄▅▄▆▂▁▅▄▂▃▄▄▄▄▅▄▄▄▄▁▆▄▂▅▄▅
Training loss,█▄▁▁▄▄▃▄▅▄▃▂▄▃▆▄▄▃▃▃▃▃▂▃▂▂▂▄▅▁▄▃▃▄▆▂▆▂▅▃

0,1
Accuracy,0.46903
Macro-f1-score,0.4442
Precision,0.49985
Recall,0.46073
Test loss,2.26822
Training loss,2.12239


[34m[1mwandb[0m: Agent Starting Run: tu61nm5f with config:
[34m[1mwandb[0m: 	batch_size: 85
[34m[1mwandb[0m: 	bow: countVector
[34m[1mwandb[0m: 	hidden_size: 541
[34m[1mwandb[0m: 	learning_rate: 0.008416324837240126
[34m[1mwandb[0m: 	nrows: 377
[34m[1mwandb[0m: 	num_epochs: 37
[34m[1mwandb[0m: 	num_layers: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
Accuracy,▂▂▅▃▁▂▅▄▅▆▇▇▅▅▅▇▄▇▆▆▇▆▆▅█▄▅▇█▅▄▆▆▇▅▆▇
Macro-f1-score,▂▃▄▃▁▂▅▄▄▇▆▆▅▅▆▆▅▆▆▆▆▆▆▆█▄▅▇█▅▄▆▆█▅▆▆
Precision,▃▃▅▃▁▃▄▄▄▆▅▅▅▄▅▅▄▆▆▅▅▆▇▇█▅▅▇▇▅▅▆▅█▅▅▅
Recall,▃▃▅▃▁▃▅▄▅▆▆▆▅▅▆▆▅▇▇▆▇▆▆▆▇▄▅▇█▅▄▆▆█▄▆▇
Test loss,█▅▃▃▅▄▆▅▆▄▆▄▄▅▄▁▃▆▃▄▆▄▃▁▂▃▃▃▃▃▄▅▄▂▄▂▄
Training loss,█▃▁▂▁▂▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁

0,1
Accuracy,0.53581
Macro-f1-score,0.52631
Precision,0.54394
Recall,0.53092
Test loss,2.24059
Training loss,2.17401


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fyfes9pl with config:
[34m[1mwandb[0m: 	batch_size: 114
[34m[1mwandb[0m: 	bow: tf_idf
[34m[1mwandb[0m: 	hidden_size: 100
[34m[1mwandb[0m: 	learning_rate: 0.009725182731641076
[34m[1mwandb[0m: 	nrows: 390
[34m[1mwandb[0m: 	num_epochs: 54
[34m[1mwandb[0m: 	num_layers: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
Timed out waiting for wandb service to start after 30.0 seconds. Try increasing the timeout with the `_service_wait` setting.
Traceback (most recent call last):
  File "C:\Users\jespe\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\wandb\sdk\wandb_init.py", line 1172, in init
    wi.setup(kwargs)
  File "C:\Users\jespe\AppData\Local\Packages\PythonSoftwareFoundation.Py

0,1
Accuracy,▁▄▅▄▄▄▅▅▅▅▄▄▅▅█▇▇█▇
Macro-f1-score,▁▄▅▄▄▃▅▄▅▅▄▄▄▅▇▇▇█▇
Precision,▁▄▅▃▄▄▅▄▅▅▄▄▄▅▇▇▇█▇
Recall,▁▄▅▄▄▃▅▅▅▅▄▄▄▅█▇▇█▇
Test loss,█▅▄▂▂▂▁▃▂▂▂▂▂▃▂▁▂▂▁
Training loss,█▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Accuracy,0.52778
Macro-f1-score,0.53066
Precision,0.56981
Recall,0.52587
Test loss,2.25322
Training loss,2.24121


[34m[1mwandb[0m: Agent Starting Run: rixan2b3 with config:
[34m[1mwandb[0m: 	batch_size: 94
[34m[1mwandb[0m: 	bow: countVector
[34m[1mwandb[0m: 	hidden_size: 630
[34m[1mwandb[0m: 	learning_rate: 0.00426548683291671
[34m[1mwandb[0m: 	nrows: 928
[34m[1mwandb[0m: 	num_epochs: 48
[34m[1mwandb[0m: 	num_layers: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
Accuracy,▁▁▄▅▅▅▆▅▇▅▆▅▆▆▇▇▇▇▇▇█▇▇███▇▇▇▇▇█▇██▇▇▇▇█
Macro-f1-score,▁▁▄▅▅▆▆▆▇▆▆▆▆▆▇▇▇▇▇▇█▇▇███▇▇▇▇▇█▇██▇▇▇██
Precision,▂▁▄▆▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇█▇▇▇███▇█▇▇██▇██▇▇▇▇█
Recall,▁▁▄▅▅▆▆▆▇▆▆▆▆▆▇▆▇▇▇▇█▇▇█▇█▇▇▇▇▇█▇██▇▇▇▇█
Test loss,█▇▅▄▃▃▃▃▂▂▃▃▂▃▂▂▂▁▁▁▂▂▃▂▁▁▂▂▃▂▂▁▂▂▂▁▁▂▁▁
Training loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Accuracy,0.62716
Macro-f1-score,0.64103
Precision,0.68464
Recall,0.62911
Test loss,2.22603
Training loss,2.20676


[34m[1mwandb[0m: Agent Starting Run: uzhx6ldy with config:
[34m[1mwandb[0m: 	batch_size: 119
[34m[1mwandb[0m: 	bow: countVector
[34m[1mwandb[0m: 	hidden_size: 970
[34m[1mwandb[0m: 	learning_rate: 0.007926661884061126
[34m[1mwandb[0m: 	nrows: 224
[34m[1mwandb[0m: 	num_epochs: 32
[34m[1mwandb[0m: 	num_layers: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
Timed out waiting for wandb service to start after 30.0 seconds. Try increasing the timeout with the `_service_wait` setting.
Traceback (most recent call last):
  File "C:\Users\jespe\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\wandb\sdk\wandb_init.py", line 1172, in init
    wi.setup(kwargs)
  File "C:\Users\jespe\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\wandb\s

0,1
Accuracy,▁▅▃▄▄█▅▃▃
Macro-f1-score,▁▄▃▄▄█▄▃▃
Precision,▁▄▂▄▃█▅▃▃
Recall,▁▄▃▄▃█▄▃▃
Test loss,██▇▇▆▅▃▂▁
Training loss,██▇▇▆▅▄▃▁

0,1
Accuracy,0.30534
Macro-f1-score,0.28369
Precision,0.28423
Recall,0.31506
Test loss,2.29996
Training loss,2.29043


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fyfes9pl with config:
[34m[1mwandb[0m: 	batch_size: 114
[34m[1mwandb[0m: 	bow: tf_idf
[34m[1mwandb[0m: 	hidden_size: 100
[34m[1mwandb[0m: 	learning_rate: 0.009725182731641076
[34m[1mwandb[0m: 	nrows: 390
[34m[1mwandb[0m: 	num_epochs: 54
[34m[1mwandb[0m: 	num_layers: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
Timed out waiting for wandb service to start after 30.0 seconds. Try increasing the timeout with the `_service_wait` setting.
Traceback (most recent call last):
  File "C:\Users\jespe\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\wandb\sdk\wandb_init.py", line 1172, in init
    wi.setup(kwargs)
  File "C:\Users\jespe\AppData\Local\Packages\PythonSoftwareFoundation.Py

0,1
Accuracy,▅█▇▃▃▃▃▂▃▃▁▄▄▃▁▅▅▅▄▄▄▄▄▄▄▇▆▆▇▆▇▆▅▄▆▃▄▄▄▅
Macro-f1-score,▅█▆▂▃▃▃▂▂▃▂▄▃▄▁▄▄▅▄▄▄▄▄▄▄▇▆▆▇▆▇▆▆▅▆▃▄▄▄▅
Precision,▄█▅▂▂▂▂▂▃▃▁▃▂▃▂▅▄▄▄▅▄▃▄▄▄▇▆▆▆▅▇▆▅▅▆▄▄▅▄▄
Recall,▅█▆▃▃▃▃▃▃▃▂▄▃▄▁▅▄▅▄▄▄▄▃▄▄▇▆▅█▅▆▆▅▄▆▃▄▃▄▄
Test loss,█▇▇▇▇▇▇▇▆█▆▅▂▇▃▂▄▂▄▃▇▃▂▄▂▂▁▄▄▃▆▄▂▄▄▆▅▄▆▃
Training loss,███▇▇▆▆▇▇▅▅▆▅▅▄▄▃▆▄▅▅▄▃▂▅▅▃▂▃▄▂▂▄▁▃▂▂▃▄▁

0,1
Accuracy,0.26163
Macro-f1-score,0.23626
Precision,0.24217
Recall,0.23977
Test loss,2.24061
Training loss,2.07126


[34m[1mwandb[0m: Agent Starting Run: 7vurowpb with config:
[34m[1mwandb[0m: 	batch_size: 88
[34m[1mwandb[0m: 	bow: tf_idf
[34m[1mwandb[0m: 	hidden_size: 743
[34m[1mwandb[0m: 	learning_rate: 0.0018407446904875793
[34m[1mwandb[0m: 	nrows: 575
[34m[1mwandb[0m: 	num_epochs: 46
[34m[1mwandb[0m: 	num_layers: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
