# Define the configuration

In [37]:
config = {
    "method": "random",
    "metric": {
        "goal": "maximize",
        "name": "Macro-f1-score"
    },
    "parameters": {
        "num_hidden_layers": {
            "distribution": "int_uniform",
            "max": 5,
            "min": 1
        },
        "nrows": {
            "distribution": "int_uniform",
            "max": 1000,
            "min": 100
        },
        "batch_size": {
            "distribution": "int_uniform",
            "max": 200,
            "min": 50
        },
        "bow": {
            "distribution": "categorical",
            "values": ["tf_idf", "countVector"]
        },
        "learning_rate": {
            "distribution": "uniform",
            "max": 0.01,
            "min": 0.0001
        },
        "num_epochs": {
            "distribution": "int_uniform",
            "max": 60,
            "min": 5
        },
    }
}

# Define the function to run

In [38]:
import wandb
import torch
import torch.nn as nn
import numpy as np

from torch.utils.data import DataLoader

from dataSet import CustomDataDataSet
from rnn import RNN
from ffnn import FFNN
from Preprocessing import getDataFrameFromData
from BoW import TF_IDF, getCountVector
from networkFunctions import train, test

def sweep(config=None):
    torch.manual_seed(888)
    np.random.seed(888)
    
    with wandb.init(config=config):

        config = wandb.config

        train_df, train_labels = getDataFrameFromData("Archive/arxiv_train.csv", nrows=config.nrows)
        test_df, test_labels = getDataFrameFromData("Archive/arxiv_test.csv", nrows=config.nrows)

        if config.bow == "tf_idf":
            train_tensors, vocabulary = TF_IDF(train_df)
            test_tensors, _ = TF_IDF(test_df, vocabulary=vocabulary)
        elif config.bow == "countVector":
            train_tensors, vocabulary = getCountVector(train_df)
            test_tensors, _ = getCountVector(test_df, vocabulary=vocabulary)
        else:
            return
        
        train_dataset = CustomDataDataSet(train_tensors, train_labels)
        test_dataset = CustomDataDataSet(test_tensors, test_labels)
        train_dataloader = DataLoader(dataset=train_dataset,
                                    batch_size=config.batch_size,
                                    shuffle=True)
        test_dataloader = DataLoader(dataset=test_dataset,
                                    batch_size=config.batch_size,
                                    shuffle=True)
        
        model = FFNN(size_vocabulary=train_tensors.shape[1],
                     num_hidden_layers=config.num_hidden_layers)
            
        loss_function = nn.CrossEntropyLoss()

        optimiser = torch.optim.Adam(model.parameters(),
                                    lr=config.learning_rate)
    
        for _ in range(config.num_epochs):
            train(train_dataloader, model, optimiser, loss_function)
            test(test_dataloader, model, loss_function)




# Inistialise the sweep. NB! Skip if already created

In [39]:
sweep_id = wandb.sweep(config, project="RNNs and You")

Create sweep with ID: 940il575
Sweep URL: https://wandb.ai/j-soberg/RNNs%20and%20You/sweeps/940il575


# Run a sweep agent, using count to specify how many runs (optional). If sweep already exists, then copy the id from W&B

In [40]:
wandb.agent(sweep_id, sweep, count=5)

[34m[1mwandb[0m: Agent Starting Run: px1vu4ug with config:
[34m[1mwandb[0m: 	batch_size: 200
[34m[1mwandb[0m: 	bow: countVector
[34m[1mwandb[0m: 	learning_rate: 0.003362877432133975
[34m[1mwandb[0m: 	nrows: 50
[34m[1mwandb[0m: 	num_epochs: 59
[34m[1mwandb[0m: 	num_hidden_layers: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
Accuracy,▅▇██▇▅▅▇▇▅▆▅▃▅▅▅▅▆▆▆▆▅▅▅▅▅▅▄▄▄▄▂▂▁▅▄▅▄▃▂
Macro-f1-score,▅▆█▇▇▄▄▆▇▄▆▅▃▅▃▃▅▅▅▅▅▅▅▅▅▅▅▄▅▅▅▂▂▁▆▄▅▃▃▂
Precision,▄▅▇█▇▅▅▄▆▃▅▅▃▃▂▂▃▄▄▄▄▃▃▄▄▄▄▃▄▄▄▂▂▁▆▅▆▄▄▃
Recall,▄▆█▇▇▅▅▆▇▅▆▅▂▆▅▅▆▇▇▇▇▆▆▆▆▆▆▅▅▅▅▃▂▁▅▄▅▄▃▃
Test loss,▅▄▃▁▂▂▂▃▄▃▄▄▄▆▇█████▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▄▄▄▄▄
Training loss,██▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂

0,1
Accuracy,0.1
Macro-f1-score,0.08658
Precision,0.14325
Recall,0.13929
Test loss,2.29281
Training loss,2.154


[34m[1mwandb[0m: Agent Starting Run: 9029j459 with config:
[34m[1mwandb[0m: 	batch_size: 194
[34m[1mwandb[0m: 	bow: tf_idf
[34m[1mwandb[0m: 	learning_rate: 0.0014113652122077478
[34m[1mwandb[0m: 	nrows: 29
[34m[1mwandb[0m: 	num_epochs: 35
[34m[1mwandb[0m: 	num_hidden_layers: 3
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
Accuracy,▁▅▆▅▅▅▅▅▃▃▃▅█▆▅▃▁▁▁▁▁▁▁▃▃▃▃▁▁▁▃▃▁▁▃
Macro-f1-score,▂▄▅▄▄▄▄▄▁▁▁▅██▇▇▅▅▅▅▅▅▄▆▆▆▆▄▄▄▅▅▃▃▅
Precision,▂▃▅▅▅▄▅▅▁▁▁▅▆▇▇█▇▇▇▇▇▇▅▇▇▇▆▅▅▅▅▅▄▄▄
Recall,▂▄▆▄▄▄▄▄▂▂▂▄█▆▄▃▁▁▁▁▁▁▁▃▃▃▃▁▁▁▃▃▁▁▃
Test loss,████████▇▇▆▅▄▃▂▂▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂
Training loss,███████▇▆▅▄▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Accuracy,0.31034
Macro-f1-score,0.27714
Precision,0.285
Recall,0.29167
Test loss,2.26137
Training loss,2.00444


[34m[1mwandb[0m: Agent Starting Run: 4swma6es with config:
[34m[1mwandb[0m: 	batch_size: 145
[34m[1mwandb[0m: 	bow: tf_idf
[34m[1mwandb[0m: 	learning_rate: 0.007120533196918315
[34m[1mwandb[0m: 	nrows: 33
[34m[1mwandb[0m: 	num_epochs: 74
[34m[1mwandb[0m: 	num_hidden_layers: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
Accuracy,█▃▃▃▃▅▅▅▆▆███▆▅▅▅▅▃▃▃▃▁▁▁▃▃▃▁▃▃▃▃▃▃▃▃▃▃▃
Macro-f1-score,▆▂▃▄▄▅▄▄▅▆▇▇█▅▅▅▅▆▄▄▄▄▂▂▂▃▂▂▁▂▂▂▂▂▂▂▂▂▂▂
Precision,▆▁▂▄▄▃▂▁▄▆▅▅█▅▅▅▅▆▅▅▅▅▅▅▅▆▅▅▄▅▅▅▅▅▅▅▅▅▅▅
Recall,▇▅▅▅▅▆▆▆▇▇███▆▅▅▅▅▃▃▃▃▁▁▁▂▂▂▁▃▃▃▃▃▃▃▃▃▃▃
Test loss,██▇▆▄▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Training loss,██▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Accuracy,0.39394
Macro-f1-score,0.33393
Precision,0.40583
Recall,0.33
Test loss,2.17576
Training loss,2.04561


[34m[1mwandb[0m: Agent Starting Run: b0mfejb0 with config:
[34m[1mwandb[0m: 	batch_size: 85
[34m[1mwandb[0m: 	bow: countVector
[34m[1mwandb[0m: 	learning_rate: 0.001279391856041666
[34m[1mwandb[0m: 	nrows: 100
[34m[1mwandb[0m: 	num_epochs: 9
[34m[1mwandb[0m: 	num_hidden_layers: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
Accuracy,▁▇██▇▅▄▄▂
Macro-f1-score,▁▆█▇██▆▅▄
Precision,▁▆▆▇█▇▆▆▅
Recall,▁▅█▆▆▆▄▄▃
Test loss,█▄▂▁▁▂▄▅▅
Training loss,█▇▅▄▄▂▁▃▁

0,1
Accuracy,0.23
Macro-f1-score,0.20083
Precision,0.31437
Recall,0.22476
Test loss,2.26668
Training loss,2.03655


[34m[1mwandb[0m: Agent Starting Run: 4czmhfdk with config:
[34m[1mwandb[0m: 	batch_size: 197
[34m[1mwandb[0m: 	bow: tf_idf
[34m[1mwandb[0m: 	learning_rate: 0.003827286998937608
[34m[1mwandb[0m: 	nrows: 79
[34m[1mwandb[0m: 	num_epochs: 59
[34m[1mwandb[0m: 	num_hidden_layers: 5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
Accuracy,▅█▇▁▁▁▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
Macro-f1-score,▆█▅▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Precision,▄█▄▂▄▃▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
Recall,▅█▆▁▁▁▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
Test loss,▇▇▆▅▁▄▅█████████████████████████████████
Training loss,███▁▅▂▇█▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆

0,1
Accuracy,0.08861
Macro-f1-score,0.03429
Precision,0.10769
Recall,0.11111
Test loss,2.30552
Training loss,2.29286
