# Define the configuration

In [5]:
config = {
    "method": "random",
    "metric": {
        "goal": "maximize",
        "name": "Macro-f1-score"
    },
    "parameters": {
        "num_hidden_layers": {
            "distribution": "int_uniform",
            "max": 5,
            "min": 1
        },
        "nrows": {
            "distribution": "int_uniform",
            "max": 1000,
            "min": 100
        },
        "batch_size": {
            "distribution": "int_uniform",
            "max": 200,
            "min": 50
        },
        "bow": {
            "distribution": "categorical",
            "values": ["tf_idf", "countVector"]
        },
        "learning_rate": {
            "distribution": "uniform",
            "max": 0.01,
            "min": 0.0001
        },
        "num_epochs": {
            "distribution": "int_uniform",
            "max": 60,
            "min": 5
        },
    }
}

# Define the function to run

In [2]:
import wandb
import torch
import torch.nn as nn
import numpy as np

from torch.utils.data import DataLoader

from dataSet import CustomDataDataSet
from rnn import RNN
from ffnn import FFNN
from Preprocessing import getDataFrameFromData
from BoW import TF_IDF, getCountVector
from networkFunctions import train, test

def sweep(config=None):
    torch.manual_seed(888)
    np.random.seed(888)
    
    with wandb.init(config=config):

        config = wandb.config

        train_df, train_labels = getDataFrameFromData("Archive/arxiv_train.csv", nrows=config.nrows)
        test_df, test_labels = getDataFrameFromData("Archive/arxiv_test.csv", nrows=config.nrows)

        if config.bow == "tf_idf":
            train_tensors, vocabulary = TF_IDF(train_df)
            test_tensors, _ = TF_IDF(test_df, vocabulary=vocabulary)
        elif config.bow == "countVector":
            train_tensors, vocabulary = getCountVector(train_df)
            test_tensors, _ = getCountVector(test_df, vocabulary=vocabulary)
        else:
            return
        
        train_dataset = CustomDataDataSet(train_tensors, train_labels)
        test_dataset = CustomDataDataSet(test_tensors, test_labels)
        train_dataloader = DataLoader(dataset=train_dataset,
                                    batch_size=config.batch_size,
                                    shuffle=True)
        test_dataloader = DataLoader(dataset=test_dataset,
                                    batch_size=config.batch_size,
                                    shuffle=True)
        
        model = FFNN(size_vocabulary=train_tensors.shape[1],
                     num_hidden_layers=config.num_hidden_layers)
            
        loss_function = nn.CrossEntropyLoss()

        optimiser = torch.optim.Adam(model.parameters(),
                                    lr=config.learning_rate)
    
        for _ in range(config.num_epochs):
            train(train_dataloader, model, optimiser, loss_function)
            test(test_dataloader, model, loss_function)




# Initialise the sweep. NB! Skip if already created

In [7]:
sweep_id = wandb.sweep(config, entity="jespermagnus", project="Epochs and You")

Create sweep with ID: ap66cvum
Sweep URL: https://wandb.ai/jespermagnus/Epochs%20and%20You/sweeps/ap66cvum


# Run a sweep agent, using count to specify how many runs (optional). If sweep already exists, then copy the id from W&B

In [3]:
wandb.agent("ap66cvum", sweep, entity="jespermagnus", project="Epochs and You")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Agent Starting Run: l7djtmok with config:
[34m[1mwandb[0m: 	batch_size: 76
[34m[1mwandb[0m: 	bow: countVector
[34m[1mwandb[0m: 	learning_rate: 0.009573007289181892
[34m[1mwandb[0m: 	nrows: 606
[34m[1mwandb[0m: 	num_epochs: 44
[34m[1mwandb[0m: 	num_hidden_layers: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mj-soberg[0m ([33mjespermagnus[0m). Use [1m`wandb login --relogin`[0m to force relogin


0,1
Accuracy,▇▄▃▁▄▃▂▁▄▄▂▄▄▄▄▆▄▅▄▅▅▅▄▅▅▄▆▅▆▆▆▆▇▅▇▇▆▆█▆
Macro-f1-score,█▄▂▂▄▅▃▂▄▂▁▂▂▂▂▄▁▃▃▄▄▂▂▄▄▂▄▄▄▄▅▅▅▄▅▅▅▆▅▅
Precision,▆▄▁▂▇▅▄▂▄▂▁▂▂▂▂█▁▃▂▂▆▁▁▅▃▁▂▃▃▂▃▃▃▂▃▃▄▅▅▃
Recall,▇▄▃▁▄▃▂▁▄▄▂▄▄▄▄▆▄▅▄▆▆▅▄▆▅▄▆▆▆▆▆▆▇▅▆▇▆▆█▆
Test loss,▁▂▄▅▂▂▄▆▃▂▄▃▃▄▅▃▄▆█▇▄▅▅▆▆▆▅▆▅▆▄▃▃▄▄▄▃▃▃▅
Training loss,▁▅▄▅▆▅▄▅▆▅▄▂▂▃▂▄▃▃▇▅▆▆▅▇▅▆█▄▅▃▄▅▄▂▂▆▄▆▅▆

0,1
Accuracy,0.09571
Macro-f1-score,0.0339
Precision,0.08599
Recall,0.10613
Test loss,2.30103
Training loss,2.30506


[34m[1mwandb[0m: Agent Starting Run: lpm960r3 with config:
[34m[1mwandb[0m: 	batch_size: 131
[34m[1mwandb[0m: 	bow: countVector
[34m[1mwandb[0m: 	learning_rate: 0.004580829720168187
[34m[1mwandb[0m: 	nrows: 902
[34m[1mwandb[0m: 	num_epochs: 47
[34m[1mwandb[0m: 	num_hidden_layers: 5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


: 