# Define the configuration

In [9]:
config = {
    "method": "random",
    "metric": {
        "goal": "maximize",
        "name": "Macro-f1-score"
    },
    "parameters": {
        "hidden_size": {
            "distribution": "int_uniform",
            "max": 1000,
            "min": 10
        },
        "num_layers": {
            "distribution": "int_uniform",
            "max": 2,
            "min": 1
        },
        "nrows": {
            "distribution": "int_uniform",
            "max": 1000,
            "min": 100
        },
        "batch_size": {
            "distribution": "int_uniform",
            "max": 200,
            "min": 50
        },
        "bow": {
            "distribution": "categorical",
            "values": ["tf_idf", "countVector"]
        },
        "learning_rate": {
            "distribution": "uniform",
            "max": 0.01,
            "min": 0.0001
        },
        "num_epochs": {
            "distribution": "int_uniform",
            "max": 60,
            "min": 5
        },
    }
}

# Define the function to run

In [10]:
import wandb
import torch
import torch.nn as nn
import numpy as np

from torch.utils.data import DataLoader

from dataSet import CustomDataDataSet
from rnn import RNN
from ffnn import FFNN
from Preprocessing import getDataFrameFromData
from BoW import TF_IDF, getCountVector
from networkFunctions import train, test

def sweep(config=None):
    torch.manual_seed(888)
    np.random.seed(888)
    
    with wandb.init(config=config):

        config = wandb.config

        train_df, train_labels = getDataFrameFromData("Archive/arxiv_train.csv", nrows=config.nrows)
        test_df, test_labels = getDataFrameFromData("Archive/arxiv_test.csv", nrows=config.nrows)

        if config.bow == "tf_idf":
            train_tensors, vocabulary = TF_IDF(train_df)
            test_tensors, _ = TF_IDF(test_df, vocabulary=vocabulary)
        elif config.bow == "countVector":
            train_tensors, vocabulary = getCountVector(train_df)
            test_tensors, _ = getCountVector(test_df, vocabulary=vocabulary)
        else:
            return
        
        train_dataset = CustomDataDataSet(train_tensors, train_labels)
        test_dataset = CustomDataDataSet(test_tensors, test_labels)
        train_dataloader = DataLoader(dataset=train_dataset,
                                    batch_size=config.batch_size,
                                    shuffle=True)
        test_dataloader = DataLoader(dataset=test_dataset,
                                    batch_size=config.batch_size,
                                    shuffle=True)
        
        model = RNN(train_tensors.shape[1],
                    config.hidden_size,
                    config.num_layers)
            
        loss_function = nn.CrossEntropyLoss()

        optimiser = torch.optim.Adam(model.parameters(),
                                        lr=config.learning_rate)
    
        for _ in range(config.num_epochs):
            train(train_dataloader, model, optimiser, loss_function)
            test(test_dataloader, model, loss_function)




# Inistialise the sweep. NB! Skip if already created

In [11]:
sweep_id = wandb.sweep(config, project="RNNs and You")

Create sweep with ID: g8ten13i
Sweep URL: https://wandb.ai/j-soberg/RNNs%20and%20You/sweeps/g8ten13i


# Run a sweep agent, using count to specify how many runs (optional). If run already exists, then copy the id from W&B

In [12]:
wandb.agent(sweep_id, sweep, count=5)

[34m[1mwandb[0m: Agent Starting Run: mmk0sxlh with config:
[34m[1mwandb[0m: 	batch_size: 198
[34m[1mwandb[0m: 	bow: countVector
[34m[1mwandb[0m: 	hidden_size: 89
[34m[1mwandb[0m: 	learning_rate: 0.006204759594465084
[34m[1mwandb[0m: 	nrows: 31
[34m[1mwandb[0m: 	num_epochs: 7
[34m[1mwandb[0m: 	num_layers: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
Accuracy,▃▁▆▆▁█▁
Macro-f1-score,▁▂█▆▄▅▅
Precision,▁▁█▇▃▂▇
Recall,▃▄█▁▃▇▄
Test loss,██▆▄▅▁▃
Training loss,█▇▅▃▂▁▁

0,1
Accuracy,0.19355
Macro-f1-score,0.22056
Precision,0.22
Recall,0.27
Test loss,2.28597
Training loss,2.03789


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nqcpf1bo with config:
[34m[1mwandb[0m: 	batch_size: 193
[34m[1mwandb[0m: 	bow: countVector
[34m[1mwandb[0m: 	hidden_size: 783
[34m[1mwandb[0m: 	learning_rate: 0.004069366626899545
[34m[1mwandb[0m: 	nrows: 70
[34m[1mwandb[0m: 	num_epochs: 26
[34m[1mwandb[0m: 	num_layers: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
Accuracy,▆▃▄▆▆▆▄▃▄▁▆▄▆▃▆▆▇▆▇▅▇▇█▆█▆
Macro-f1-score,▅▃▄▆▅▅▅▃▄▁▅▄▆▄▆▆█▇▇▅▇▇▇▆█▆
Precision,▃▃▅▅▃▃▃▄▄▁▄▆▆▅▆▅█▆▇▅▅██▅▇▆
Recall,▅▂▄▆▅▅▅▄▄▁▅▄▇▃▇▆█▇█▅▇▆█▆█▆
Test loss,█▆▄▇▆▆▄▄▅▇▅▄▆▃▄▅▁▃▄▄▆▁▄▃▄▂
Training loss,█▅▃▂▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Accuracy,0.24286
Macro-f1-score,0.24355
Precision,0.33345
Recall,0.25194
Test loss,2.27117
Training loss,2.17842


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fg8jqbny with config:
[34m[1mwandb[0m: 	batch_size: 141
[34m[1mwandb[0m: 	bow: tf_idf
[34m[1mwandb[0m: 	hidden_size: 480
[34m[1mwandb[0m: 	learning_rate: 0.007218284065834039
[34m[1mwandb[0m: 	nrows: 54
[34m[1mwandb[0m: 	num_epochs: 53
[34m[1mwandb[0m: 	num_layers: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
Accuracy,█▃▄▂▂▃▄▃▅▂▅▄▂▄▃▃▄▃▃▂▆▄▃▃▅▂▃▃▁▃▃▄▂▃▄▃▃▅▃▃
Macro-f1-score,█▂▃▂▃▃▄▂▄▂▅▄▂▄▂▃▄▃▃▂▅▃▃▂▄▂▂▄▁▃▃▄▂▃▄▂▃▅▃▄
Precision,█▂▄▃▆▃▃▂▆▁▅▅▂▄▂▃▃▃▃▁▆▄▄▂▄▂▃▄▁▂▃▄▂▃▄▂▃▅▃▃
Recall,█▂▃▂▂▂▄▂▄▂▆▃▂▃▂▃▄▄▂▂▅▃▃▃▅▂▂▄▁▃▃▃▂▃▄▂▃▅▂▄
Test loss,▃▄▂▁▄▅▃▄▄▄▂▃▅▃▆▆▃▂▇▅▁▄▃▂▄▇▄▃█▅▅▅▆▃▅▆▆▂▆▃
Training loss,▆▅▄▅▅▄▄▆▆▅▃▅▇▅▆▁▃▇▅▅▆▇▅▃▆▅▅▇▄▆▄▅▄▄▆▄▅█▃▆

0,1
Accuracy,0.09259
Macro-f1-score,0.09833
Precision,0.08857
Recall,0.13056
Test loss,2.30197
Training loss,2.30561


[34m[1mwandb[0m: Agent Starting Run: 1aj3jzs3 with config:
[34m[1mwandb[0m: 	batch_size: 197
[34m[1mwandb[0m: 	bow: tf_idf
[34m[1mwandb[0m: 	hidden_size: 905
[34m[1mwandb[0m: 	learning_rate: 0.005480165798342292
[34m[1mwandb[0m: 	nrows: 76
[34m[1mwandb[0m: 	num_epochs: 46
[34m[1mwandb[0m: 	num_layers: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
Accuracy,█▃▆▅▅▅▃▇▅▆▃▅▁▆▅▂▃▃▅▃▃▄▄▅▄▅▅▃▃▂▃▅▆▆▃▆▆▅▅▁
Macro-f1-score,█▅▆▆▆▅▄▇▄▅▃▅▁▇▄▂▃▃▆▄▃▄▃▆▅▄▄▃▃▃▃▅▅▆▃▇▅▅▆▁
Precision,▆▄▅▅▄▃▃▄▃▃▂▃▁▄▆▂▂▅▅▅▂▃▂▄▃▃▃▅▃▄▂▃▃▄▃█▄▃▆▁
Recall,█▄▆▆▅▅▃▇▄▆▄▅▁█▄▂▄▃▇▄▃▄▃▆▅▅▅▃▃▃▃▆▅▇▃▆▆▆▇▁
Test loss,▅▆▂▄▆▆▇▂▅▅█▄▇▃▅▇▆▆▃▇█▄▆▄▃▅▅▆▆▇▇▂▆▄▇▄▂▁▂█
Training loss,▄▃▂▄▃▄▃▄▃▃▅▆▇▄▁▅▅▅▅▄█▄▃▅▂▆▅▆█▂▄▆▄▃▁▄▁▇▅▆

0,1
Accuracy,0.09211
Macro-f1-score,0.06675
Precision,0.06912
Recall,0.06894
Test loss,2.29956
Training loss,2.30907


[34m[1mwandb[0m: Agent Starting Run: ukes0vnp with config:
[34m[1mwandb[0m: 	batch_size: 126
[34m[1mwandb[0m: 	bow: tf_idf
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.008282732205694043
[34m[1mwandb[0m: 	nrows: 19
[34m[1mwandb[0m: 	num_epochs: 97
[34m[1mwandb[0m: 	num_layers: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
Accuracy,▅▂▂▁▅█▃▂▂▂▃▂▅▅▂▁▁▅▂▂▁▁▃▅▂▂▂▂▆▅▁▂▃▂▃▃▃▂▃▃
Macro-f1-score,▃▃▂▁▃█▂▁▁▁▃▁▃▅▁▁▁▅▁▁▁▁▃▄▁▁▁▁▅▅▁▁▂▁▂▂▃▁▂▂
Precision,▃▃▂▁▃█▂▁▁▁▄▁▃▇▁▁▁▇▁▁▁▁▃▅▁▁▁▁▆▄▁▁▁▁▁▁▂▁▁▁
Recall,▅▃▂▁▄█▃▂▂▂▄▃▆▆▃▁▁▇▃▃▁▁▅▅▃▂▃▃▆▆▁▃▃▂▃▃▆▂▃▃
Test loss,▆▆▆▆▆▁▆▆▆▆▄▆▆▄▅▅▆▃▆▆▆▄▅▄█▇▇▆▄▅▆▆▇█▇▇▄▆▇▄
Training loss,▆▄▁▆▃▄▇▃█▅▃▇▄▇▇▄▄▄▇▃▄▃▅▃▁▃▃▄▄▄▆▄▅▃▄▅▁▃▃▃

0,1
Accuracy,0.10526
Macro-f1-score,0.02222
Precision,0.0125
Recall,0.1
Test loss,2.27836
Training loss,2.25984
