<a href="https://colab.research.google.com/github/EAkeweje/Accelerating-Hydrogen-Oxidation-Calculations/blob/main/Model%20Optimization_RNN%20(Exclusion%20of%20H2%20and%20O2).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install wandb -Uq

[K     |████████████████████████████████| 1.8 MB 16.8 MB/s 
[K     |████████████████████████████████| 181 kB 61.2 MB/s 
[K     |████████████████████████████████| 158 kB 69.6 MB/s 
[K     |████████████████████████████████| 63 kB 1.9 MB/s 
[K     |████████████████████████████████| 157 kB 75.0 MB/s 
[K     |████████████████████████████████| 157 kB 74.8 MB/s 
[K     |████████████████████████████████| 157 kB 67.6 MB/s 
[K     |████████████████████████████████| 157 kB 75.2 MB/s 
[K     |████████████████████████████████| 157 kB 77.3 MB/s 
[K     |████████████████████████████████| 157 kB 61.5 MB/s 
[K     |████████████████████████████████| 157 kB 76.2 MB/s 
[K     |████████████████████████████████| 156 kB 49.5 MB/s 
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [None]:
#import and login
import wandb

wandb.login()

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 

··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

import torch
from torch.autograd import Variable
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

import PIL
import pandas as pd
from tqdm.notebook import tqdm
import json
import time
import cv2
import random
import glob
import os
import copy
from sklearn.metrics import r2_score

## Utils

In [None]:
#Dataset object
class OxidationDataset(Dataset):

    def __init__(self, inputs_path, outputs_dir, nsample, ntimesteps):
        """
        Args:
            inputs_dir (string): 
            outputs_dir (string):
        """
        
        # x = np.loadtxt(inputs_path, skiprows = 1, delimiter= ',')
        # x = np.loadtxt(inputs_path, delimiter= ' ')
        with open(inputs_path, 'rb') as f:
          x = np.load(f)
        #drop inert gases
        self.inputs = np.delete(x[ :nsample, :], [0, 3, 8,9], 1)[:, np.newaxis] #exclude H2, O2 and the inert gases
        self.out_dir = outputs_dir
        self.ntimesteps = ntimesteps


    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        #get output
        # Y = np.loadtxt(os.path.join(self.out_dir, f'out{idx}.txt'), delimiter= ';')
        with open(os.path.join(self.out_dir, f'out{idx}.npy'), 'rb') as f:
          Y = np.load(f)

        #drop inert gases
        if type(self.ntimesteps) == int:
            Y = Y[:self.ntimesteps, 1:-2]
        elif type(self.ntimesteps) == list:
            Y = Y[self.ntimesteps, 1:-2]
        elif self.ntimesteps == 'all':
            Y = Y[:, 1:-2]
        else:
            raise ValueError("Invalid argument 'ntimestep' should an interger, a list or 'all' string")
        #excluding H2 and O2
        Y = np.delete(Y, [1,4], axis = 1)

        return torch.from_numpy(self.inputs[idx]), torch.from_numpy(Y)

In [None]:
#Dataset object
class OxidationDataset_C(Dataset):
  '''
  Taking concentrations as model input. Using the gas equation, compute the input concetrations, which are the 
  concentrations at time t = 0 in the output files. Thus, the model do not learn to predict output at time t=0.
  '''

  def __init__(self, inputs_path, outputs_dir, nsample, ntimesteps, exclude_h2_o2 = False):
    """
    Args:
        inputs_dir (string): 
        outputs_dir (string):
    """
    
    # x = np.loadtxt(inputs_path, skiprows = 1, delimiter= ',')
    # x = np.loadtxt(inputs_path, delimiter= ' ')
    with open(inputs_path, 'rb') as f:
      x = np.load(f)
    self.exclude = exclude_h2_o2
    #drop inert gases
    if not self.exclude:
      #load proportions
      self.props = np.delete(x[ :nsample, :], [8,9], 1)
    else:
      self.props = np.delete(x[ :nsample, :], [0, 3, 8, 9], 1)
    self.out_dir = outputs_dir
    self.ntimesteps = ntimesteps

  def get_total_material(self, Pressure, Temperature):
    R =  8.314462618
    return Pressure / (R * Temperature)

  def __len__(self):
    return len(self.props)

  def __getitem__(self, idx):
    #get output
    # Y = np.loadtxt(os.path.join(self.out_dir, f'out{idx}.txt'), delimiter= ';')
    pres = self.props[idx, -2]
    temp = self.props[idx, -1]
    total_amount_all = self.get_total_material(pres, temp)
    #compute concentrations
    concs = self.props[idx,:-2]*total_amount_all#[:, np.newaxis]
    input = np.concatenate((concs, self.props[idx,-2:]))#, axis = 1)
    
    with open(os.path.join(self.out_dir, f'out{idx}.npy'), 'rb') as f:
      Y = np.load(f)

    #drop inert gases
    if type(self.ntimesteps) == int:
      Y = Y[self.ntimesteps, 1:-2]
    elif type(self.ntimesteps) == list:
      Y = Y[self.ntimesteps, 1:-2]
    elif self.ntimesteps == 'all':
      Y = Y[1:, 1:-2]
    else:
      raise ValueError("Invalid argument 'ntimestep' should an interger, a list or 'all' string")

    if self.exclude:
      Y = np.delete(Y, [1,4], axis = 1)

    return torch.from_numpy(input[np.newaxis,:]), torch.from_numpy(Y)

In [None]:
#for data standardization
def standardize(tensor, dim = 0, mean = None, std = None) -> torch.float64:
    tensor = tensor.float()

    if mean is None:
        mean = tensor.mean(dim = dim, keepdim = True)

    if std is None:
        std = tensor.std(dim = dim, keepdim = True)

    standard_tensor = (tensor - mean) / std
    return standard_tensor, mean, std

def inverse_standardize(tensor, dim, mean, std):
    tensor = tensor.float()
    return tensor * std + mean

In [None]:
def load_mean_std(path, loader = None):
  if os.path.exists(path):
    #get means and stds
    with open(path, 'r') as f:
        mean_std_dict = json.load(f)
    train_x_mean = torch.tensor(mean_std_dict['mean_x'])
    train_x_std = torch.tensor(mean_std_dict['std_x'])
    train_y_mean = torch.tensor(mean_std_dict['mean_y'])
    train_y_std = torch.tensor(mean_std_dict['std_y'])
    return train_x_mean, train_x_std, train_y_mean, train_y_std
  else:
    return save_mean_std(loader, path, output = True)
  

In [None]:
def save_mean_std(loader, path, output = False):
  '''
  loader:: preferably train loader
  path:: a json path
  output:: bool: to output the means and stds
  '''
  #obtaining mean and std of training set
  train_x = []
  train_y = []
  for x,y in loader:
      train_x.append(x)
      train_y.append(y)

  _, train_x_mean, train_x_std = standardize(torch.concat(train_x), 0)
  _, train_y_mean, train_y_std = standardize(torch.concat(train_y), 0)

  #write to disc
  mean_std_dict = {
      'mean_x' : train_x_mean.tolist(),
      'mean_y' : train_y_mean.tolist(),
      'std_x' : train_x_std.tolist(),
      'std_y' : train_y_std.tolist()
  }

  with open(path, 'w') as f:
    json.dump(mean_std_dict, f)
  
  if output:
    return train_x_mean, train_x_std, train_y_mean, train_y_std

In [None]:
def train_step(model, optimizer, criterion, dataloader, mean_std, device, mb_coeff = 0):
  '''
  Do one training epoch.
  mb_coeff is material balance weight. If mb_coeff == 0, then material balance is not considered.
  '''
  train_loss_ = 0.0
  model.train()
  for input, target in dataloader:
    # Transfer Data to GPU if available
    input, target = input.to(device), target.to(device)
    # Standardize
    if mean_std:
      input, _, _ = standardize(input, 0, mean_std[0], mean_std[1])
      target, _, _ = standardize(target, 0, mean_std[2], mean_std[3])
    # Forward Pass
    predict = model(input.float())
    # Find loss
    if mb_coeff != 0:
      #Material Balance
      T_H_true, T_H_pred, T_O_true, T_O_pred = total_material(target, predict)
      loss = criterion(predict,target.float()) + mb_coeff * criterion(T_H_pred, T_H_true) + mb_coeff * criterion(T_O_pred, T_O_true)
    else:
      loss = criterion(predict, target.float())
    # Clear the gradients
    optimizer.zero_grad()
    # Calculate gradients
    loss.backward()
    # Update Weights
    optimizer.step()
    # Calculate Loss
    train_loss_ += loss.item() 
  return train_loss_

def valid_step(model, criterion, dataloader, mean_std, device, mb_coeff = 0):
  valid_loss_ = 0.0
  model.eval()     # Optional when not using Model Specific layer
  for input, target in dataloader:
    # Transfer Data to GPU if available
    input, target = input.to(device), target.to(device)
    #standardize
    if mean_std:
      input, _, _ = standardize(input, 0, mean_std[0], mean_std[1])
      target, _, _ = standardize(target, 0, mean_std[2], mean_std[3])
    # Forward Pass
    predict = model(input.float())
    # Find the Loss
    if mb_coeff != 0:
      #Material Balance
      T_H_true, T_H_pred, T_O_true, T_O_pred = total_material(target, predict)
      loss = criterion(predict,target.float()) + mb_coeff * criterion(T_H_pred, T_H_true) + mb_coeff * criterion(T_O_pred, T_O_true)
    else:
      loss = criterion(predict,target.float())
    # Calculate Loss
    valid_loss_ += loss.item()
  return valid_loss_

In [None]:
def loss_plot(train_loss, valid_loss):
  plt.figure(figsize = (10,7))
  epochs = len(train_loss)
  epochs2 = len(valid_loss)
  plt.plot(range(epochs),train_loss,'-r',label = 'Training loss')
  plt.plot(range(epochs2),valid_loss,'-b',label = 'Validation loss')
  plt.yscale('log')
  plt.legend(fontsize=15)
  plt.grid()
  plt.xlabel('epochs', fontsize=15)
  plt.ylabel('MSE', fontsize=15)
  plt.show()
# plt.title(f'Loss curve (2ks {len(dataset.guage_space)}sensor)', fontsize = 20)}

In [None]:
def total_material(y_true, y_pred):
  '''
  Computes the total material (moles) of Hydrogen and Oxygen in product.
  y_true: ground truth
  y_pred: (model) prediction
  '''
  assert y_true.ndim == y_pred.ndim, f'Tensors are of different dimensions: {y_true.ndim} and {y_pred.ndim}'
  assert y_true.shape == y_pred.shape, f'Shape of tensor are different: {y_true.shape} and {y_pred.shape}'
  assert y_true.ndim < 4, f'Tensor dimension must be less than 4: {y_true.ndim}'

  if y_true.ndim == 1:
    T_H_true = 2*y_true[1] + y_true[2] + y_true[5]+ 2*y_true[6] + y_true[7]+ 2*y_true[8]
    T_H_pred = 2*y_pred[1] + y_pred[2] + y_pred[5]+ 2*y_pred[6] + y_pred[7]+ 2*y_pred[8]
    T_O_true = y_true[3] + 2*y_true[4] + y_true[5]+ y_true[6] + 2*y_true[7]+ 2*y_true[8]
    T_O_pred = y_pred[3] + 2*y_pred[4] + y_pred[5]+ y_pred[6] + 2*y_pred[7]+ 2*y_pred[8]

  elif y_true.ndim == 2:
    T_H_true = 2*y_true[:,1] + y_true[:,2] + y_true[:,5]+ 2*y_true[:,6] + y_true[:,7]+ 2*y_true[:,8]
    T_H_pred = 2*y_pred[:,1] + y_pred[:,2] + y_pred[:,5]+ 2*y_pred[:,6] + y_pred[:,7]+ 2*y_pred[:,8]
    T_O_true = y_true[:,3] + 2*y_true[:,4] + y_true[:,5]+ y_true[:,6] + 2*y_true[:,7]+ 2*y_true[:,8]
    T_O_pred = y_pred[:,3] + 2*y_pred[:,4] + y_pred[:,5]+ y_pred[:,6] + 2*y_pred[:,7]+ 2*y_pred[:,8]
  
  elif y_true.ndim == 3:
    T_H_true = 2*y_true[:,:,1] + y_true[:,:,2] + y_true[:,:,5]+ 2*y_true[:,:,6] + y_true[:,:,7]+ 2*y_true[:,:,8]
    T_H_pred = 2*y_pred[:,:,1] + y_pred[:,:,2] + y_pred[:,:,5]+ 2*y_pred[:,:,6] + y_pred[:,:,7]+ 2*y_pred[:,:,8]
    T_O_true = y_true[:,:,3] + 2*y_true[:,:,4] + y_true[:,:,5]+ y_true[:,:,6] + 2*y_true[:,:,7]+ 2*y_true[:,:,8]
    T_O_pred = y_pred[:,:,3] + 2*y_pred[:,:,4] + y_pred[:,:,5]+ y_pred[:,:,6] + 2*y_pred[:,:,7]+ 2*y_pred[:,:,8]
  
  return T_H_true, T_H_pred, T_O_true, T_O_pred


In [None]:
###Networks
#single step
class RNN_Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(RNN_Model, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(self.input_size, self.hidden_size, self.num_layers, dropout = 0.1, batch_first = True)
        self.linear = nn.Linear(self.hidden_size, self.output_size)
        
    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.linear(out)
        return out
    
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.kaiming_uniform_(m.weight, nonlinearity= 'relu')
        m.bias.data.fill_(0.0)

###multiple time steps
#one to many RNN architechture
class Base_RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(Base_RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(self.input_size, self.hidden_size, self.num_layers, dropout = 0.1, batch_first = True)
        self.linear = nn.Linear(self.hidden_size, self.input_size)
        
    def forward(self, x, in_hidden = None):
        if in_hidden == None:
            out, out_hidden = self.rnn(x)
        else:
            out, out_hidden = self.rnn(x, in_hidden)
        out = self.linear(out)
        return out, out_hidden

class RNN_Model_Multiple(nn.Module):
    def __init__(self,input_size, hidden_size, num_layers, output_size, ntimesteps):
        super(RNN_Model_Multiple, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.ntimesteps = ntimesteps
        self.base_rnn = Base_RNN(self.input_size, self.hidden_size, self.num_layers)
        self.relu = nn.ReLU()
        self.linear = nn.Linear(self.input_size, self.output_size)
        
    def forward(self, x):
        out = x
        hidden = None
        
        #to store outputs
        outputs = []
        
        for t in range(self.ntimesteps):                
            #pass to rnn
            out, hidden = self.base_rnn(out, hidden)
            #pass to output layer
            outputs.append(self.linear(self.relu(out)))
            #skip connection
            out += x

        return torch.concat(outputs, 1)

#one to many RNN architechture
class Base_RNN_v2(nn.Module):
    def __init__(self, output_size, hidden_size, num_layers):
        super(Base_RNN_v2, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(self.output_size, self.hidden_size, self.num_layers, dropout = 0.1, batch_first = True)
        self.linear = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, x, in_hidden = None):
        if in_hidden == None:
            out, out_hidden = self.rnn(x)
        else:
            out, out_hidden = self.rnn(x, in_hidden)
        out = self.linear(out)
        return out, out_hidden

class RNN_Model_Multiple_v2(nn.Module):
    def __init__(self,input_size, hidden_size, num_layers, output_size, ntimesteps):
        super(RNN_Model_Multiple_v2, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.ntimesteps = ntimesteps
        self.base_rnn = Base_RNN_v2(self.output_size, self.hidden_size, self.num_layers)
        self.relu = nn.ReLU()
        self.linear = nn.Linear(self.input_size, self.output_size)

    def forward(self, x):
        #pass to first layer and activation
        x = self.relu(self.linear(x))

        #to store outputs
        outputs = []

        #set inputs to first RNN layer
        out = x
        hidden = None

        #loop through rnn layers
        for t in range(self.ntimesteps):                
            #pass to rnn
            out, hidden = self.base_rnn(out, hidden)
            #pass to output layer
            outputs.append(out)
            #skip connection
            out += x

        return torch.concat(outputs, 1)

In [None]:
def make_dataloaders(config):
    '''
    batch_size: int = batch size
    ntimesteps: int or list = number/list of time steps in data
    nsample: int = number of samples to use
    split: list = list of train set to data ration and train+valid set to data ratio
    '''
    #initialize dataset object
    dataset = OxidationDataset_C(inputs_path = 'input_98660.npy',#'input_98660.txt',
                               outputs_dir = './Out_files_npy',
                               nsample = config.nsample,
                               ntimesteps = config.timesteps,
                               exclude_h2_o2 = config.exclude)
    # Creating Training, Validation, and Test dataloaders
    # split Train : Val : Test = 0.7 : 0.2 : 0.1
    dataset_size = len(dataset)
    indices = list(range(dataset_size))
    train_split = int(np.floor(config.split[0] * dataset_size))
    val_split = int(np.floor(config.split[1] * dataset_size))
    shuffle_dataset = True
    random_seed = 42
    if shuffle_dataset :
        np.random.seed(random_seed)
        np.random.shuffle(indices)
    train_indices = indices[ : train_split]
    val_indices = indices[train_split : train_split + val_split]
    test_indices = indices[train_split + val_split : ]

    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)
    test_sampler = SubsetRandomSampler(test_indices)

    train_loader = DataLoader(dataset, batch_size = config.batch_size, sampler=train_sampler)
    validation_loader = DataLoader(dataset, batch_size = config.batch_size, sampler=valid_sampler)
    test_loader = DataLoader(dataset, batch_size = config.batch_size, sampler=test_sampler)
    
    return train_loader, validation_loader, test_loader

def to_device(model, train_x_mean, train_x_std, train_y_mean, train_y_std):
  if torch.cuda.is_available():
    model = model.cuda()
    train_x_mean = train_x_mean.cuda()
    train_x_std = train_x_std.cuda()
    train_y_mean = train_y_mean.cuda()
    train_y_std = train_y_std.cuda()

In [None]:
def training(model, train_loader, val_loader, config, criterion, mean_std, device, dict_path):
  '''
  model:: Neural network
  epoch:: 
  optimizer:: optimization algorithm. Default Adam
  learning rate:: 
  dict_path:: path to save best model's state_dict
  criterion:: loss function. Default = nn.MSELoss()
  scheduler:: schedule learning rate. Default = False
  mb_coeff :: Weight for material balance inclusion. Default = 0
  '''
  min_valid_loss = np.inf
  # train_loss = []
  # valid_loss = []

  if config.optimizer == 'Adam':
    optimizer = torch.optim.Adam(model.parameters(), lr = config.lr, betas = (config.beta1, config.beta2))
  elif config.optimizer == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr = config.lr, momentum= config.SGD_momentum)
  else:
    raise ValueError(f'Unknown optimizer ({config.optimizer})')

  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)
  no_save = 0 #for early stopping

  for e in range(config.epochs):
    train_loss_ = train_step(model, optimizer, criterion, train_loader, mean_std, device, config.mb_coeff)
    valid_loss_ = valid_step(model, criterion, val_loader, mean_std, device, config.mb_coeff)

    wandb.log({"train_loss": train_loss_/len(train_loader),
               "val_loss": valid_loss_/len(val_loader)})

    if config.scheduling:
      scheduler.step()

    if min_valid_loss > valid_loss_:
        min_valid_loss = valid_loss_

        no_save = 0 #reset counter
        # Saving State Dict
        if dict_path != None:
          torch.save(model.state_dict(), dict_path)
        
    else:
      no_save += 1

    # Early stopping
    if no_save >= 200:
      break

  return e, min_valid_loss

# Model Optimization

In [None]:
#Define the sweep
sweep_config = {
    'name': 'Accelerating Hydrogen Oxidation Calculations',
    'description': 'Model Optimization for Adam Optimizer',
    'method': 'random', #how sweep controller select hyperparameter
    'metric': {'name': 'loss',
               'goal': 'minimize'}
                }
sweep_config

{'name': 'Accelerating Hydrogen Oxidation Calculations (Concentration inputs)',
 'description': 'Model Optimization for Adam Optimizer',
 'method': 'random',
 'metric': {'name': 'loss', 'goal': 'minimize'}}

In [None]:
param_dict = {
    'batch_size': {
        'values': [8, 16, 32]
        },
    'timesteps': {
        'value': 'all'
        },
    'nsample': {
        'value': 1500
        },
    'hidden_size': {
        'values': [20, 40, 60, 80, 100]
        },
    'num_layers': {
        'values': [2, 3, 4]
        },
    'ntimesteps': {
        'value': 201
        },
    'epochs': {
        'value': 1000
        },
    'optimizer': {
        'value': 'Adam'
        },
    'lr': {
      'values': [5e-2, 1e-2, 5e-3, 1e-3, 5e-4, 1e-4]
        },
    'beta1': {
        'values': [0.85, 0.9, 0.95]
        },
    'beta2': {
        'values': [0.9, 0.95, 0.99]
        },
    'mb_coeff': {
        'value': 0
        },
    'scheduling': {
        'values': [True, False]
       },
    'model': {
        'values': ['v1', 'v2']
    },
    'split': {
        'value': [0.7, 0.2, 0.1]
    },
    'dict_path':{
        'value': None
    },
    'exclude': {
        'value': True
    },
    'mean_std_path': {
        'value': 'rnn_NoH2_O2_all_mean_std.json'
    }
    }

sweep_config['parameters'] = param_dict

In [None]:
sweep_config

{'name': 'Accelerating Hydrogen Oxidation Calculations (Concentration inputs)',
 'description': 'Model Optimization for Adam Optimizer',
 'method': 'random',
 'metric': {'name': 'loss', 'goal': 'minimize'},
 'parameters': {'batch_size': {'values': [8, 16, 32]},
  'timesteps': {'value': 'all'},
  'nsample': {'value': 1500},
  'hidden_size': {'values': [20, 40, 60, 80, 100]},
  'num_layers': {'values': [2, 3, 4]},
  'ntimesteps': {'value': 200},
  'epochs': {'value': 1000},
  'optimizer': {'value': 'Adam'},
  'lr': {'values': [0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001]},
  'beta1': {'values': [0.85, 0.9, 0.95]},
  'beta2': {'values': [0.9, 0.95, 0.99]},
  'mb_coeff': {'value': 0},
  'scheduling': {'values': [True, False]},
  'model': {'values': ['v1', 'v2']},
  'split': {'value': [0.7, 0.2, 0.1]},
  'dict_path': {'value': None},
  'exclude': {'value': False},
  'mean_std_path': {'value': None},
  'input_size': {'value': 10},
  'output_size': {'value': 9}}}

In [None]:
# Initialize Sweep
sweep_id = wandb.sweep(sweep_config, project="Hydrogen-Oxidation-Models-Optimization")

Create sweep with ID: 316icv60
Sweep URL: https://wandb.ai/data-bigwig/Hydrogen-Oxidation-Models-Optimization/sweeps/316icv60


In [None]:
def train_model(config = None):
  # tell wandb to get started
  with wandb.init(config=config):
  # access all HPs through wandb.config, so logging matches execution!
    config = wandb.config

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    mean_std_path = config.mean_std_path
    input_size = 8
    output_size = 7
    dict_path = None

    #Get data
    train_loader, val_loader, test_loader = make_dataloaders(config)
    #obtaining mean and std of training set
    train_x_mean, train_x_std, train_y_mean, train_y_std = load_mean_std(mean_std_path)
    #initialize model
    if config.model == 'v1':
      model = RNN_Model_Multiple(input_size, config.hidden_size, config.num_layers, output_size, config.ntimesteps)
    elif config.model == 'v2':
      model = RNN_Model_Multiple_v2(input_size, config.hidden_size, config.num_layers, output_size, config.ntimesteps)
    model.apply(init_weights)
    #training
    model = model.to(device)
    mean_std = [train_x_mean.to(device), train_x_std.to(device), train_y_mean.to(device), train_y_std.to(device)]
    criterion  = nn.MSELoss()
    epochs, val_loss = training(model, train_loader, val_loader, config, criterion, mean_std, device, dict_path)
    wandb.log({'loss': val_loss})
    wandb.log({'num_epochs': epochs})

In [None]:
wandb.agent(sweep_id, train_model, project="Hydrogen-Oxidation-Models-Optimization", count=20)

[34m[1mwandb[0m: Agent Starting Run: 8eqcyi1w with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	beta1: 0.85
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	dict_path: None
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	exclude: False
[34m[1mwandb[0m: 	hidden_size: 60
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	mb_coeff: 0
[34m[1mwandb[0m: 	mean_std_path: None
[34m[1mwandb[0m: 	model: v2
[34m[1mwandb[0m: 	nsample: 1500
[34m[1mwandb[0m: 	ntimesteps: 200
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	output_size: 9
[34m[1mwandb[0m: 	scheduling: False
[34m[1mwandb[0m: 	split: [0.7, 0.2, 0.1]
[34m[1mwandb[0m: 	timesteps: all
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
loss,▁
num_epochs,▁
train_loss,█▆▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
loss,29492397.75
num_epochs,350.0
train_loss,781687.48627
val_loss,785390.67763


[34m[1mwandb[0m: Agent Starting Run: l4bcxps6 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.95
[34m[1mwandb[0m: 	beta2: 0.9
[34m[1mwandb[0m: 	dict_path: None
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	exclude: False
[34m[1mwandb[0m: 	hidden_size: 100
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	mb_coeff: 0
[34m[1mwandb[0m: 	mean_std_path: None
[34m[1mwandb[0m: 	model: v1
[34m[1mwandb[0m: 	nsample: 1500
[34m[1mwandb[0m: 	ntimesteps: 200
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	output_size: 9
[34m[1mwandb[0m: 	scheduling: False
[34m[1mwandb[0m: 	split: [0.7, 0.2, 0.1]
[34m[1mwandb[0m: 	timesteps: all
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
loss,▁
num_epochs,▁
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
loss,22760.71259
num_epochs,428.0
train_loss,2211.52917
val_loss,2355.96177


[34m[1mwandb[0m: Agent Starting Run: z5o9axa1 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	beta1: 0.95
[34m[1mwandb[0m: 	beta2: 0.9
[34m[1mwandb[0m: 	dict_path: None
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	exclude: False
[34m[1mwandb[0m: 	hidden_size: 20
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	mb_coeff: 0
[34m[1mwandb[0m: 	mean_std_path: None
[34m[1mwandb[0m: 	model: v2
[34m[1mwandb[0m: 	nsample: 1500
[34m[1mwandb[0m: 	ntimesteps: 200
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	output_size: 9
[34m[1mwandb[0m: 	scheduling: False
[34m[1mwandb[0m: 	split: [0.7, 0.2, 0.1]
[34m[1mwandb[0m: 	timesteps: all
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
loss,▁
num_epochs,▁
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
loss,24668839.875
num_epochs,999.0
train_loss,646154.35417
val_loss,650013.99178


[34m[1mwandb[0m: Agent Starting Run: ob7khxg8 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.85
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	dict_path: None
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	exclude: False
[34m[1mwandb[0m: 	hidden_size: 100
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	mb_coeff: 0
[34m[1mwandb[0m: 	mean_std_path: None
[34m[1mwandb[0m: 	model: v1
[34m[1mwandb[0m: 	nsample: 1500
[34m[1mwandb[0m: 	ntimesteps: 200
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	output_size: 9
[34m[1mwandb[0m: 	scheduling: False
[34m[1mwandb[0m: 	split: [0.7, 0.2, 0.1]
[34m[1mwandb[0m: 	timesteps: all


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
loss,▁
num_epochs,▁
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▃▁▃▄▂▂▃▃▁█▅▂▄▃▅▂█▃▃▃▃█▃▁▂▂▅▃█▂▂██▂▄█▄▇▁▃

0,1
loss,22090.59467
num_epochs,205.0
train_loss,2182.41384
val_loss,2285.83563


[34m[1mwandb[0m: Agent Starting Run: mm331lzf with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.85
[34m[1mwandb[0m: 	beta2: 0.9
[34m[1mwandb[0m: 	dict_path: None
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	exclude: False
[34m[1mwandb[0m: 	hidden_size: 40
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	mb_coeff: 0
[34m[1mwandb[0m: 	mean_std_path: None
[34m[1mwandb[0m: 	model: v2
[34m[1mwandb[0m: 	nsample: 1500
[34m[1mwandb[0m: 	ntimesteps: 200
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	output_size: 9
[34m[1mwandb[0m: 	scheduling: False
[34m[1mwandb[0m: 	split: [0.7, 0.2, 0.1]
[34m[1mwandb[0m: 	timesteps: all


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
loss,▁
num_epochs,▁
train_loss,█▇▇▆▅▅▄▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▇▆▅▅▄▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
loss,20902.68433
num_epochs,873.0
train_loss,2047.53203
val_loss,3374.53291


[34m[1mwandb[0m: Agent Starting Run: og1ckoma with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9
[34m[1mwandb[0m: 	dict_path: None
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	exclude: False
[34m[1mwandb[0m: 	hidden_size: 60
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	mb_coeff: 0
[34m[1mwandb[0m: 	mean_std_path: None
[34m[1mwandb[0m: 	model: v1
[34m[1mwandb[0m: 	nsample: 1500
[34m[1mwandb[0m: 	ntimesteps: 200
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	output_size: 9
[34m[1mwandb[0m: 	scheduling: True
[34m[1mwandb[0m: 	split: [0.7, 0.2, 0.1]
[34m[1mwandb[0m: 	timesteps: all


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
loss,▁
num_epochs,▁
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▅▁▂▁▁▂█▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂

0,1
loss,82806.34027
num_epochs,203.0
train_loss,2149.67609
val_loss,2297.63337


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xvb89iob with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.95
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	dict_path: None
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	exclude: False
[34m[1mwandb[0m: 	hidden_size: 100
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	mb_coeff: 0
[34m[1mwandb[0m: 	mean_std_path: None
[34m[1mwandb[0m: 	model: v1
[34m[1mwandb[0m: 	nsample: 1500
[34m[1mwandb[0m: 	ntimesteps: 200
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	output_size: 9
[34m[1mwandb[0m: 	scheduling: True
[34m[1mwandb[0m: 	split: [0.7, 0.2, 0.1]
[34m[1mwandb[0m: 	timesteps: all


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
loss,▁
num_epochs,▁
train_loss,█▇▅▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▅▃▃▂▂▂▂▁▁▂▂▁▂▂▂▂▁▁▂▂▂▂▁▁▂▂▁▁▁▁▂▁▁▂▁▂▂▂

0,1
loss,7463926.0
num_epochs,562.0
train_loss,747035.0625
val_loss,752544.70625


[34m[1mwandb[0m: Agent Starting Run: 84jyieax with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.95
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	dict_path: None
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	exclude: False
[34m[1mwandb[0m: 	hidden_size: 60
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	mb_coeff: 0
[34m[1mwandb[0m: 	mean_std_path: None
[34m[1mwandb[0m: 	model: v2
[34m[1mwandb[0m: 	nsample: 1500
[34m[1mwandb[0m: 	ntimesteps: 200
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	output_size: 9
[34m[1mwandb[0m: 	scheduling: True
[34m[1mwandb[0m: 	split: [0.7, 0.2, 0.1]
[34m[1mwandb[0m: 	timesteps: all


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
loss,▁
num_epochs,▁
train_loss,█▄▁▁
val_loss,▅▇█▁

0,1
loss,7864396.125
num_epochs,208.0
train_loss,
val_loss,


[34m[1mwandb[0m: Agent Starting Run: f4h86tl2 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	beta1: 0.85
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	dict_path: None
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	exclude: False
[34m[1mwandb[0m: 	hidden_size: 80
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	mb_coeff: 0
[34m[1mwandb[0m: 	mean_std_path: None
[34m[1mwandb[0m: 	model: v2
[34m[1mwandb[0m: 	nsample: 1500
[34m[1mwandb[0m: 	ntimesteps: 200
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	output_size: 9
[34m[1mwandb[0m: 	scheduling: False
[34m[1mwandb[0m: 	split: [0.7, 0.2, 0.1]
[34m[1mwandb[0m: 	timesteps: all


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
loss,▁
num_epochs,▁
train_loss,█▇▃▂▂▇▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▇▄█▃▃▆▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
loss,85114.53989
num_epochs,311.0
train_loss,2152.67122
val_loss,2297.8395


[34m[1mwandb[0m: Agent Starting Run: ouqm8e0d with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.9
[34m[1mwandb[0m: 	dict_path: None
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	exclude: False
[34m[1mwandb[0m: 	hidden_size: 40
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	mb_coeff: 0
[34m[1mwandb[0m: 	mean_std_path: None
[34m[1mwandb[0m: 	model: v1
[34m[1mwandb[0m: 	nsample: 1500
[34m[1mwandb[0m: 	ntimesteps: 200
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	output_size: 9
[34m[1mwandb[0m: 	scheduling: True
[34m[1mwandb[0m: 	split: [0.7, 0.2, 0.1]
[34m[1mwandb[0m: 	timesteps: all


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
loss,▁
num_epochs,▁
train_loss,█▆▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
loss,14182976.3125
num_epochs,671.0
train_loss,744061.60606
val_loss,746737.72039


[34m[1mwandb[0m: Agent Starting Run: wc8ee9rq with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.85
[34m[1mwandb[0m: 	beta2: 0.99
[34m[1mwandb[0m: 	dict_path: None
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	exclude: False
[34m[1mwandb[0m: 	hidden_size: 80
[34m[1mwandb[0m: 	input_size: 10
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	mb_coeff: 0
[34m[1mwandb[0m: 	mean_std_path: None
[34m[1mwandb[0m: 	model: v2
[34m[1mwandb[0m: 	nsample: 1500
[34m[1mwandb[0m: 	ntimesteps: 200
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	output_size: 9
[34m[1mwandb[0m: 	scheduling: True
[34m[1mwandb[0m: 	split: [0.7, 0.2, 0.1]
[34m[1mwandb[0m: 	timesteps: all
