# Define the configuration

In [2]:
config = {
    "method": "random",
    "metric": {
        "goal": "maximize",
        "name": "Macro-f1-score"
    },
    "parameters": {
        "num_hidden_layers": {
            "distribution": "int_uniform",
            "max": 5,
            "min": 1
        },
        "nrows": {
            "distribution": "int_uniform",
            "max": 1000,
            "min": 100
        },
        "batch_size": {
            "distribution": "int_uniform",
            "max": 200,
            "min": 50
        },
        "bow": {
            "distribution": "categorical",
            "values": ["tf_idf", "countVector"]
        },
        "learning_rate": {
            "distribution": "uniform",
            "max": 0.01,
            "min": 0.0001
        },
        "num_epochs": {
            "distribution": "int_uniform",
            "max": 60,
            "min": 5
        },
    }
}

# Define the function to run

In [3]:
import wandb
import torch
import torch.nn as nn
import numpy as np

from torch.utils.data import DataLoader

from dataSet import CustomDataDataSet
from ffnn import FFNN
from Preprocessing import getDataFrameFromData
from BoW import TF_IDF, getCountVector
from networkFunctions import train, test

def sweep(config=None):
    torch.manual_seed(888)
    np.random.seed(888)
    
    with wandb.init(config=config):

        config = wandb.config

        train_df, train_labels = getDataFrameFromData("Archive/arxiv_train.csv", nrows=config.nrows)
        test_df, test_labels = getDataFrameFromData("Archive/arxiv_test.csv", nrows=config.nrows)

        if config.bow == "tf_idf":
            train_tensors, vocabulary = TF_IDF(train_df)
            test_tensors, _ = TF_IDF(test_df, vocabulary=vocabulary)
        elif config.bow == "countVector":
            train_tensors, vocabulary = getCountVector(train_df)
            test_tensors, _ = getCountVector(test_df, vocabulary=vocabulary)
        else:
            return
        
        train_dataset = CustomDataDataSet(train_tensors, train_labels)
        test_dataset = CustomDataDataSet(test_tensors, test_labels)
        train_dataloader = DataLoader(dataset=train_dataset,
                                    batch_size=config.batch_size,
                                    shuffle=True)
        test_dataloader = DataLoader(dataset=test_dataset,
                                    batch_size=config.batch_size,
                                    shuffle=True)
        
        model = FFNN(size_vocabulary=train_tensors.shape[1],
                     num_hidden_layers=config.num_hidden_layers)
            
        loss_function = nn.CrossEntropyLoss()

        optimiser = torch.optim.Adam(model.parameters(),
                                    lr=config.learning_rate)
    
        for _ in range(config.num_epochs):
            train(train_dataloader, model, optimiser, loss_function)
            test(test_dataloader, model, loss_function)




# Initialise the sweep. NB! Skip if already created

In [4]:
#sweep_id = wandb.sweep(config, entity='jespermagnus', project="Epochs and You")

# Run a sweep agent, using count to specify how many runs (optional). If sweep already exists, then copy the id from W&B

In [7]:
wandb.agent('xuxpyp1z', sweep, entity='jespermagnus', project="Epochs and You", count=3)

[34m[1mwandb[0m: Agent Starting Run: 0xd762k9 with config:
[34m[1mwandb[0m: 	batch_size: 145
[34m[1mwandb[0m: 	bow: tf_idf
[34m[1mwandb[0m: 	learning_rate: 0.006575719931407151
[34m[1mwandb[0m: 	nrows: 97
[34m[1mwandb[0m: 	num_epochs: 41
[34m[1mwandb[0m: 	num_hidden_layers: 3
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mm-tysdal[0m ([33mjespermagnus[0m). Use [1m`wandb login --relogin`[0m to force relogin


0,1
Accuracy,▇█▇▅▅▄▃▃▂▃▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
Macro-f1-score,███▆▇▅▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Precision,▅▅▅▅█▄▂▂▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁
Recall,██▇▆▆▅▃▃▂▂▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
Test loss,▇▇▃▁▁▃▇██▇▆▆██████████████████████▆▆▆▆▆▆
Training loss,██▇▂▂▁▃▄▅▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇

0,1
Accuracy,0.07216
Macro-f1-score,0.01458
Precision,0.00787
Recall,0.1
Test loss,2.29591
Training loss,2.28602


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: thdxr4fr with config:
[34m[1mwandb[0m: 	batch_size: 167
[34m[1mwandb[0m: 	bow: tf_idf
[34m[1mwandb[0m: 	learning_rate: 0.008177757349451559
[34m[1mwandb[0m: 	nrows: 96
[34m[1mwandb[0m: 	num_epochs: 39
[34m[1mwandb[0m: 	num_hidden_layers: 3
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
Accuracy,██▅▅▂▂▃▂▂▂▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Macro-f1-score,█▇▅▅▂▂▂▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Precision,▇▆▇█▄▂▁▁▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Recall,██▅▅▂▂▂▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
Test loss,▇▇▃▁▇▆█████████████████████████████████
Training loss,██▇▂▁▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇

0,1
Accuracy,0.07292
Macro-f1-score,0.01373
Precision,0.00737
Recall,0.1
Test loss,2.305
Training loss,2.29458


[34m[1mwandb[0m: Agent Starting Run: 3r6uilj7 with config:
[34m[1mwandb[0m: 	batch_size: 50
[34m[1mwandb[0m: 	bow: countVector
[34m[1mwandb[0m: 	learning_rate: 0.003868337255799407
[34m[1mwandb[0m: 	nrows: 100
[34m[1mwandb[0m: 	num_epochs: 38
[34m[1mwandb[0m: 	num_hidden_layers: 4
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
Accuracy,█▄▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
Macro-f1-score,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Precision,█▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Recall,█▄▃▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
Test loss,█▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁██▁▁▁▁▁▁▁▁▁▁▁▁█▁
Training loss,▇▁█▇▇▆▆▆▆▆▆▆▆▁▆▆▆▆▆▆▆▆▆▆▆▆▆▁▆▆▆▆▆▆▆▆▆▆

0,1
Accuracy,0.07
Macro-f1-score,0.01333
Precision,0.00714
Recall,0.1
Test loss,2.28669
Training loss,2.29669
