# Running Sweeps 🚀

In [1]:
import os
os.chdir('../src')

In [2]:
import wandb
import yaml
from managers.trainer import Trainer
from ingestion.dataloader import SCDataLoader
from utils import create_model
from typing import Dict, Any

Using backend: pytorch


Loading config files - the normal and the sweep config file.

In [3]:
os.chdir('..')
with open('config/sweep_config.yml', 'r') as yaml_file:
    sweep_config_file = yaml.safe_load(yaml_file)

Adding in metrics, parameters, and sweeping method into the `sweep_config` variable.

In [4]:
sweep_config = {
    'method': 'bayes',
    'parameters': sweep_config_file
}

In [5]:
metric = {
    'name': 'Training AUC buys_from',
    'goal': 'minimize'   
}
sweep_config['metric'] = metric

Generate a sweep ID for `project="KG-Completion-Sweep"`

In [6]:
sweep_id = wandb.sweep(sweep_config, project="KG-Completion-Sweep")

Create sweep with ID: flp6ncui
Sweep URL: https://wandb.ai/grandintegrator/KG-Completion-Sweep/sweeps/flp6ncui


In [7]:
def train(config=None):
    # Initialize a new weights and biases run
    with wandb.init(config=config):
        config = wandb.config
        data_loader = SCDataLoader(params=config)
        train_loader = data_loader.get_training_dataloader()
        graph_model = \
            create_model(params=config,
                         graph_edge_types=data_loader.edge_types)
        trainer = Trainer(params=config, model=graph_model,
                          train_data_loader=train_loader)
        trainer.train()

In [None]:
wandb.agent(sweep_id, train, count=20)

INFO:wandb.agents.pyagent:Starting sweep agent: entity=None, project=None, count=20
[34m[1mwandb[0m: Agent Starting Run: u00mar9g with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	capability_product_weight_cut: 200
[34m[1mwandb[0m: 	cg_weight_cut: 30
[34m[1mwandb[0m: 	device: cpu
[34m[1mwandb[0m: 	eval_type: validation
[34m[1mwandb[0m: 	evaluate_every: 3
[34m[1mwandb[0m: 	from_scratch: False
[34m[1mwandb[0m: 	graph_save_path: data/02_intermediate/
[34m[1mwandb[0m: 	l2_regularisation: 0.05
[34m[1mwandb[0m: 	load_graph: True
[34m[1mwandb[0m: 	log_company_accuracy: False
[34m[1mwandb[0m: 	log_freq: 20
[34m[1mwandb[0m: 	loss: margin
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	momentum: 0.005
[34m[1mwandb[0m: 	num_classes: 2
[34m[1mwandb[0m: 	num_epochs: 1000
[34m[1mwandb[0m: 	num_hidden_graph_layers: 100
[34m[1mwandb[0m: 	num_negative_samples: 3
[34m[1mwandb[0m: 	num_node_features: 20
[34m[1mwandb[0m: 	num_wor

INFO:ingestion.dataset:Graphs loaded locally with the following dimensions:
INFO:ingestion.dataset:cG should have 14018 edges
INFO:ingestion.dataset:bG should have 119618 edges
INFO:ingestion.dataset:G should have 88997 edges
INFO:ingestion.dataset:capability_product_graph should have 21575 edges
INFO:ingestion.dataset:company_capability_graph should have 83787 edges
INFO:ingestion.dataset:36 Capabilities
INFO:ingestion.dataset:119618 Processes
INFO:ingestion.dataset:41826 Companies
INFO:ingestion.dgl_dataset:All graphs loaded to memory - moving to process...
INFO:ingestion.dgl_dataset:Triplets loaded from memory, processing to torch...
  7%|▋         | 1000/14176 [07:18<1:36:14,  2.28it/s, loss=0.205]


VBox(children=(Label(value=' 0.01MB of 0.01MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,979.0
Training loss,0.39139
Training AUC capability_produces,1.0
Training AUC buys_from,0.95625
Training AUC has_capability,0.45
Training AUC has_cert,0.66111
Training AUC located_in,0.7
Training AUC makes_product,1.0
Training AUC complimentary_product_to,0.99467
Training AP capability_produces,1.0


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
Training loss,▄█▂▂▃▂▂▂▂▂▂▁▂▂▂▁▂▃▂▂▂▁▂▁▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂
Training AUC capability_produces,████████████████████▁████
Training AUC buys_from,▃▁▄▃▆▂▁▇▄▇▅█▅▇▂▆▅▅█▄▁▃▇▁▇▁▅▄▁▅▅▄▅█▆▃▇▄▇
Training AUC has_capability,▄▄▆▄▃▇▄▄▄▂▇▅▆▁▄▇▄▇▄▄▅█▄▅▅▇▃▃▃▄▄▇▃▄▇▃▆▁▁
Training AUC has_cert,▃▅▄▇▅▆█▆▇▄▅█▆▇▅█▅▂▆▄█▇█▇▇█▄▄▆▇▇▇▇▇▅▆▁▃▅
Training AUC located_in,▅▁█▇▄▇▅▅▆▆▂█▇█▆▇▆▅█▇▆▇▇▇▆███▅▇▇▇▄▇▆▇▆▇▅
Training AUC makes_product,█▅▆▁▂█▆▇█▆▆██▆█████▆▇█████▇██▆▄▇██▇████
Training AUC complimentary_product_to,█▂█▇█▇█▇██▄█▆▆█▆▅▁██▇▇▅▆██▇▇▇██▆▅█▇█▆▆▆
Training AP capability_produces,████████████████████▁████


[34m[1mwandb[0m: Agent Starting Run: pwzjl087 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	capability_product_weight_cut: 200
[34m[1mwandb[0m: 	cg_weight_cut: 30
[34m[1mwandb[0m: 	device: cpu
[34m[1mwandb[0m: 	eval_type: validation
[34m[1mwandb[0m: 	evaluate_every: 3
[34m[1mwandb[0m: 	from_scratch: False
[34m[1mwandb[0m: 	graph_save_path: data/02_intermediate/
[34m[1mwandb[0m: 	l2_regularisation: 0.0005
[34m[1mwandb[0m: 	load_graph: True
[34m[1mwandb[0m: 	log_company_accuracy: False
[34m[1mwandb[0m: 	log_freq: 20
[34m[1mwandb[0m: 	loss: margin
[34m[1mwandb[0m: 	lr: 1e-05
[34m[1mwandb[0m: 	momentum: 0.05
[34m[1mwandb[0m: 	num_classes: 2
[34m[1mwandb[0m: 	num_epochs: 2000
[34m[1mwandb[0m: 	num_hidden_graph_layers: 1000
[34m[1mwandb[0m: 	num_negative_samples: 3
[34m[1mwandb[0m: 	num_node_features: 50
[34m[1mwandb[0m: 	num_workers: 0
[34m[1mwandb[0m: 	optimiser: Adam
[34m[1mwandb[0m: 	save_train_resu

INFO:ingestion.dataset:Graphs loaded locally with the following dimensions:
INFO:ingestion.dataset:cG should have 14018 edges
INFO:ingestion.dataset:bG should have 119618 edges
INFO:ingestion.dataset:G should have 88997 edges
INFO:ingestion.dataset:capability_product_graph should have 21575 edges
INFO:ingestion.dataset:company_capability_graph should have 83787 edges
INFO:ingestion.dataset:36 Capabilities
INFO:ingestion.dataset:119618 Processes
INFO:ingestion.dataset:41826 Companies
INFO:ingestion.dgl_dataset:All graphs loaded to memory - moving to process...
INFO:ingestion.dgl_dataset:Triplets loaded from memory, processing to torch...
 22%|██▏       | 3142/14176 [1:26:27<5:39:13,  1.84s/it, loss=0.518]