# Define the configuration

In [3]:
config = {
    "method": "random",
    "metric": {
        "goal": "maximize",
        "name": "Macro-f1-score"
    },
    "parameters": {
        "batch_size": {
            "distribution": "int_uniform",
            "max": 112,
            "min": 13
        },
        "bow": {
            "distribution": "categorical",
            "values": ["tf_idf", "countVector"]
        },
        "hidden_size": {
            "distribution": "int_uniform",
            "max": 208,
            "min": 25
        },
        "learning_rate": {
            "distribution": "uniform",
            "max": 0.0030063894350760606,
            "min": 0.0004767494294738798
        },
        "network": {
            "distribution": "categorical",
            "values": ["rnn", "ffnn"]
        },
        "nrows": {
            "distribution": "int_uniform",
            "max": 324,
            "min": 35
        },
        "num_epochs": {
            "distribution": "int_uniform",
            "max": 90,
            "min": 15
        },
        "num_layers": {
            "distribution": "int_uniform",
            "max": 2,
            "min": 1
        }
    }
}

# Define the function to run

In [4]:
import wandb
import torch
import torch.nn as nn
import numpy as np

from torch.utils.data import DataLoader

from dataSet import CustomDataDataSet
from rnn import RNN
from ffnn import FFNN
from Preprocessing import getDataFrameFromData
from BoW import TF_IDF, getCountVector
from networkFunctions import train, test

def sweep(config=None):
    torch.manual_seed(888)
    np.random.seed(888)
    
    with wandb.init(config=config):

        config = wandb.config

        train_df, train_labels = getDataFrameFromData("Archive/arxiv_train.csv", nrows=config.nrows)
        test_df, test_labels = getDataFrameFromData("Archive/arxiv_test.csv", nrows=config.nrows)

        if config.bow == "tf_idf":
            train_tensors, vocabulary = TF_IDF(train_df)
            test_tensors, _ = TF_IDF(test_df, vocabulary=vocabulary)
        elif config.bow == "countVector":
            train_tensors, vocabulary = getCountVector(train_df)
            test_tensors, _ = getCountVector(test_df, vocabulary=vocabulary)
        else:
            return
        
        train_dataset = CustomDataDataSet(train_tensors, train_labels)
        test_dataset = CustomDataDataSet(test_tensors, test_labels)
        train_dataloader = DataLoader(dataset=train_dataset,
                                    batch_size=config.batch_size,
                                    shuffle=True)
        test_dataloader = DataLoader(dataset=test_dataset,
                                    batch_size=config.batch_size,
                                    shuffle=True)
        
        if config.network == "rnn":
            print(train_tensors.shape[1])
            model = RNN(train_tensors.shape[1],
                        config.hidden_size,
                        config.num_layers)
        elif config.network == "ffnn":
            model = FFNN(train_tensors.shape[1])
        else:
            return
            
        loss_function = nn.CrossEntropyLoss()

        optimiser = torch.optim.Adam(model.parameters(),
                                        lr=config.learning_rate)
    
        for _ in range(config.num_epochs):
            train(train_dataloader, model, optimiser, loss_function)
            test(test_dataloader, model, loss_function)




# Inistialise the sweep. NB! Skip if already created

In [5]:
sweep_id = wandb.sweep(config, project="RNNs and You")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: 48m0nanw
Sweep URL: https://wandb.ai/j-soberg/RNNs%20and%20You/sweeps/48m0nanw


# Run a sweep agent, using count to specify how many runs (optional). If run already exists, then copy the id from W&B

In [6]:
wandb.agent(sweep_id, sweep, count=5)

[34m[1mwandb[0m: Agent Starting Run: ep99j5i2 with config:
[34m[1mwandb[0m: 	batch_size: 81
[34m[1mwandb[0m: 	bow: countVector
[34m[1mwandb[0m: 	hidden_size: 177
[34m[1mwandb[0m: 	learning_rate: 0.0007613591049743388
[34m[1mwandb[0m: 	network: rnn
[34m[1mwandb[0m: 	nrows: 303
[34m[1mwandb[0m: 	num_epochs: 67
[34m[1mwandb[0m: 	num_layers: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mj-soberg[0m. Use [1m`wandb login --relogin`[0m to force relogin


7523
loss: 2.302272081375122  [81.0/303]
Test Error: 
 Accuracy: 0.2607260726072607, Precision: 0.21950741475623375, recall: 0.22659511241290944, f1: 0.22163785316112244, Avg loss: 2.2998823523521423 

loss: 2.296083688735962  [81.0/303]
Test Error: 
 Accuracy: 0.33993399339933994, Precision: 0.3055924740481237, recall: 0.31317978276537234, f1: 0.3059614485441516, Avg loss: 2.2977689504623413 

loss: 2.287703514099121  [81.0/303]
Test Error: 
 Accuracy: 0.41254125412541254, Precision: 0.38574626303512854, recall: 0.3887538481710807, f1: 0.38395404658526655, Avg loss: 2.294773817062378 

loss: 2.2745275497436523  [81.0/303]
Test Error: 
 Accuracy: 0.41914191419141916, Precision: 0.39929458119149563, recall: 0.405927135620583, f1: 0.3991755454421352, Avg loss: 2.2916170358657837 

loss: 2.2643446922302246  [81.0/303]
Test Error: 
 Accuracy: 0.4752475247524752, Precision: 0.45725638843859784, recall: 0.4566176004506546, f1: 0.450724586148315, Avg loss: 2.287721335887909 

loss: 2.25077652

0,1
Accuracy,▁▂▄▆▇▇▇██▇█▇████▇▇██▇▇▇▇▇██▇▇███▇▇▇▇▇▇█▇
Macro-f1-score,▁▂▄▆▇▇▇██▇█▇████▇▇███▇█▇▇██▇▇████▇▇▇▇▇██
Precision,▁▂▄▆▇▇▇█▇▇█████▇█▇██████████▇████▇▇▇█▇██
Recall,▁▂▄▆▇▇▇██▇█▇████▇▇███▇▇▇▇██▇▇████▇▇▇▇▇█▇

0,1
Accuracy,0.60726
Macro-f1-score,0.60231
Precision,0.61218
Recall,0.60493


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qpv4dws9 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bow: countVector
[34m[1mwandb[0m: 	hidden_size: 37
[34m[1mwandb[0m: 	learning_rate: 0.001635200503672175
[34m[1mwandb[0m: 	network: ffnn
[34m[1mwandb[0m: 	nrows: 71
[34m[1mwandb[0m: 	num_epochs: 90
[34m[1mwandb[0m: 	num_layers: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


loss: 2.30251407623291  [32.0/71]
Test Error: 
 Accuracy: 0.38028169014084506, Precision: 0.4133333333333334, recall: 0.33585497835497835, f1: 0.3220417082917083, Avg loss: 2.294532219568888 

loss: 2.1923317909240723  [32.0/71]
Test Error: 
 Accuracy: 0.4084507042253521, Precision: 0.3335714285714285, recall: 0.3448701298701299, f1: 0.331893348197696, Avg loss: 2.239779313405355 

loss: 2.0810163021087646  [32.0/71]
Test Error: 
 Accuracy: 0.39436619718309857, Precision: 0.3566269841269841, recall: 0.38350649350649346, f1: 0.3511255411255411, Avg loss: 2.263838052749634 

loss: 2.0337002277374268  [32.0/71]
Test Error: 
 Accuracy: 0.38028169014084506, Precision: 0.3980555555555555, recall: 0.38040043290043285, f1: 0.35980142664353193, Avg loss: 2.2087533473968506 

loss: 2.038227081298828  [32.0/71]
Test Error: 
 Accuracy: 0.352112676056338, Precision: 0.37426739926739927, recall: 0.35388528138528136, f1: 0.3328105228105228, Avg loss: 2.2270348072052 

loss: 2.033398151397705  [32.0/7

0,1
Accuracy,██▇▅█▅▇███▇▇▅█▇▅▅▅▇▅▅▄▄▄▅▄▄▅▄▄▄▄▃▄▄▂▂▂▄▁
Macro-f1-score,▇▇▇▆█▅▇███▇▇▆██▅▅▅▇▆▅▄▅▄▅▅▄▅▄▄▄▄▃▄▃▃▂▂▄▁
Precision,█▆▇▆█▆▇▇▇██▇▆▇█▆▆▆▇▆▅▅▆▄▅▆▅▅▄▅▅▆▃▅▆▄▅▃▅▁
Recall,▆▇▆▅▇▄▆▇█▇▆▇▆▇▇▅▅▄▆▆▅▄▄▅▅▅▅▅▅▅▅▄▃▄▄▃▂▂▄▁

0,1
Accuracy,0.12676
Macro-f1-score,0.10889
Precision,0.11255
Recall,0.14394


[34m[1mwandb[0m: Agent Starting Run: 1k21e5iq with config:
[34m[1mwandb[0m: 	batch_size: 73
[34m[1mwandb[0m: 	bow: tf_idf
[34m[1mwandb[0m: 	hidden_size: 185
[34m[1mwandb[0m: 	learning_rate: 0.0022132657615738637
[34m[1mwandb[0m: 	network: rnn
[34m[1mwandb[0m: 	nrows: 324
[34m[1mwandb[0m: 	num_epochs: 59
[34m[1mwandb[0m: 	num_layers: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


7847
loss: 2.302605628967285  [73.0/324]
Test Error: 
 Accuracy: 0.4104938271604938, Precision: 0.3979120939856863, recall: 0.40185134813435386, f1: 0.3898200670640054, Avg loss: 2.301769828796387 

loss: 2.3007869720458984  [73.0/324]
Test Error: 
 Accuracy: 0.5432098765432098, Precision: 0.5185578706262384, recall: 0.5395302727578475, f1: 0.5085662386459489, Avg loss: 2.3008939266204833 

loss: 2.2981488704681396  [73.0/324]
Test Error: 
 Accuracy: 0.5833333333333334, Precision: 0.5572945363460069, recall: 0.5718552593198113, f1: 0.5527107772660034, Avg loss: 2.2996124267578124 

loss: 2.2939987182617188  [73.0/324]
Test Error: 
 Accuracy: 0.6172839506172839, Precision: 0.6163535100875255, recall: 0.6197591126038822, f1: 0.5937830865010316, Avg loss: 2.2979050636291505 

loss: 2.2883946895599365  [73.0/324]
Test Error: 
 Accuracy: 0.6049382716049383, Precision: 0.5842202681209719, recall: 0.5995165374928861, f1: 0.5710520494026484, Avg loss: 2.2951509952545166 

loss: 2.2800338268280

0,1
Accuracy,▁▄▆▆▅▅▄▅▆▇█▇█▆▇▇▇███▆▇▆▇█▇▇▇▇▇▇█▇▇▇▇█▇▇▇
Macro-f1-score,▁▄▅▅▄▄▄▅▆▇█▇▇▆▇▇▇▇██▆▇▇▇█▇▇▇▇▇▇█▇▇▇▇█▇▇▇
Precision,▁▄▅▆▄▄▄▅▆▇▇▇▇▆▇▇▇▇██▇▇▇██▇▇█▇█▇█▇▇▇▇█▇▇▇
Recall,▁▄▅▆▅▄▄▅▆▇█▇▇▆▇▇▇▇██▆▇▇▇█▇▆▇▇▇▇█▇▇▇▇█▇▇▇

0,1
Accuracy,0.62963
Macro-f1-score,0.62757
Precision,0.65568
Recall,0.63574


[34m[1mwandb[0m: Agent Starting Run: lstl8tp5 with config:
[34m[1mwandb[0m: 	batch_size: 109
[34m[1mwandb[0m: 	bow: tf_idf
[34m[1mwandb[0m: 	hidden_size: 25
[34m[1mwandb[0m: 	learning_rate: 0.0005115966692873987
[34m[1mwandb[0m: 	network: rnn
[34m[1mwandb[0m: 	nrows: 260
[34m[1mwandb[0m: 	num_epochs: 51
[34m[1mwandb[0m: 	num_layers: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


6842
loss: 2.302616596221924  [109.0/260]
Test Error: 
 Accuracy: 0.08461538461538462, Precision: 0.08704895866660572, recall: 0.09232142911563264, f1: 0.08734199391965725, Avg loss: 2.3023858865102134 

loss: 2.3026037216186523  [109.0/260]
Test Error: 
 Accuracy: 0.13076923076923078, Precision: 0.13523842391362675, recall: 0.13936943086764153, f1: 0.13053669994178266, Avg loss: 2.302467664082845 

loss: 2.3025028705596924  [109.0/260]
Test Error: 
 Accuracy: 0.13846153846153847, Precision: 0.147563915553046, recall: 0.14571059005631812, f1: 0.14102909758385077, Avg loss: 2.3024418354034424 

loss: 2.3024191856384277  [109.0/260]
Test Error: 
 Accuracy: 0.12692307692307692, Precision: 0.13728865687689212, recall: 0.13609115734842964, f1: 0.13209383814205386, Avg loss: 2.3024868965148926 

loss: 2.3022987842559814  [109.0/260]
Test Error: 
 Accuracy: 0.13846153846153847, Precision: 0.1451169472023534, recall: 0.14651028468315802, f1: 0.1388062683883132, Avg loss: 2.3023505210876465 

l

0,1
Accuracy,▁▂▂▂▂▂▂▂▂▂▂▃▄▃▄▄▄▃▄▄▄▄▄▅▅▆▅▆▆▆▆▆▆▆▆▇▇▆▇█
Macro-f1-score,▁▂▂▂▂▂▂▂▂▂▂▃▄▃▄▄▄▄▄▄▅▄▄▆▅▇▅▆▆▆▆▆▇▇▆▇▇▇▇█
Precision,▁▂▂▂▂▂▂▂▂▂▂▄▄▃▄▄▄▄▄▄▅▄▄▆▅▇▅▆▆▆▆▆▇▇▆▇▇▇▇█
Recall,▁▂▂▂▂▂▂▂▂▂▂▃▄▃▄▄▄▄▄▄▄▄▄▅▅▆▅▆▆▆▆▆▆▇▆▇▇▇▇█

0,1
Accuracy,0.46154
Macro-f1-score,0.43905
Precision,0.45558
Recall,0.46298


[34m[1mwandb[0m: Agent Starting Run: fplop9lx with config:
[34m[1mwandb[0m: 	batch_size: 71
[34m[1mwandb[0m: 	bow: countVector
[34m[1mwandb[0m: 	hidden_size: 40
[34m[1mwandb[0m: 	learning_rate: 0.000999363790360081
[34m[1mwandb[0m: 	network: rnn
[34m[1mwandb[0m: 	nrows: 140
[34m[1mwandb[0m: 	num_epochs: 19
[34m[1mwandb[0m: 	num_layers: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


4646
loss: 2.3027725219726562  [71.0/140]
Test Error: 
 Accuracy: 0.17142857142857143, Precision: 0.16797438672438672, recall: 0.16837492856674444, f1: 0.16053138967185615, Avg loss: 2.300985336303711 

loss: 2.3009331226348877  [71.0/140]
Test Error: 
 Accuracy: 0.18571428571428572, Precision: 0.18852540433422788, recall: 0.1892357344275503, f1: 0.18375636044326746, Avg loss: 2.3008843660354614 

loss: 2.2994914054870605  [71.0/140]
Test Error: 
 Accuracy: 0.18571428571428572, Precision: 0.18828208556149734, recall: 0.1690812980710679, f1: 0.1744522937881572, Avg loss: 2.3007102012634277 

loss: 2.2957100868225098  [71.0/140]
Test Error: 
 Accuracy: 0.22142857142857142, Precision: 0.21951479076479075, recall: 0.21363258012234992, f1: 0.21255570321898923, Avg loss: 2.3002768754959106 

loss: 2.2932662963867188  [71.0/140]
Test Error: 
 Accuracy: 0.19285714285714287, Precision: 0.18537464985994398, recall: 0.18772275465370095, f1: 0.18032446646298636, Avg loss: 2.299942970275879 

loss:

0,1
Accuracy,▁▁▁▂▂▂▂▃▄▄▃▅▅▆▅▆▇█▆
Macro-f1-score,▁▂▁▃▂▂▂▃▄▄▂▅▄▆▅▆▆█▆
Precision,▁▂▂▃▂▃▂▃▄▄▂▅▄▆▅▆▆█▆
Recall,▁▂▁▂▂▂▂▃▄▄▂▄▄▆▅▆▆█▆

0,1
Accuracy,0.36429
Macro-f1-score,0.33144
Precision,0.34011
Recall,0.33644
