<a href="https://colab.research.google.com/github/HamidrezaKmK/ML-Mnemonist/blob/main/ExperimentRunnerTutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup hosted runtime

In [1]:
%%writefile sys_setup.py
#!usr/bin/bash python

import sys
from google.colab import drive
import subprocess
import os
import shutil

PROJ_NAME = 'ML-Mnemonist'
GIT_DIR = 'HamidrezaKmK'

data_dir = None

if __name__ == '__main__':
  env = os.environ.copy()
  if not os.path.exists('/content/drive'):
    print("Mounting drive...")
    drive.mount('/content/drive')
    print("Mount complete!")

  while True:
    opt = input("What are you trying to do? [clone/pull] ")
    if opt == 'clone':
      addr = f"https://github.com/{GIT_DIR}/{PROJ_NAME}"
      print(f"Trying to connect to {addr}")
      token = input("Enter token: ")
      addr = addr.replace('[TOKEN]', token)
      res = subprocess.run(['git', 'clone', addr], env=env, capture_output=True)
      print(res.stdout.decode())
      print(res.stderr.decode())
      break
    elif opt == 'pull':
      path = os.path.join('/content', PROJ_NAME)
      os.chdir(path)
      res = subprocess.run(['git', 'pull'], env=env, capture_output=True)
      print(res.stdout.decode())
      print(res.stderr.decode())
      break
    elif opt == '':
      print("Nothing happened!")
      break
  
  if not os.path.exists(f'/content/{PROJ_NAME}'):
    raise RuntimeError("No project repository available!")

  if not os.path.exists(f'/content/{PROJ_NAME}/.env'):
    print("Dotenv non-existant!")
    while True:
      resp = input("Do you want to enter the file in the prompt or copy it?\n[copy/write] ")
      if resp == 'copy':
        dir = input("Enter the directory to copy: ")
        shutil.copyfile(dir, f'/content/{PROJ_NAME}/.env')
      elif resp == 'write':
        print("Enter the lines in format ENV_VARIABLE_NAME=VALUE")
        print("End with: ENDFILE")
        with open(f'/content/{PROJ_NAME}/.env', 'w') as f:
          while True:
            line = input()
            if line == 'ENDFILE':
              break
            f.write(f'{line}\n')
      else:
        continue
      break
        
  os.chdir('/content')


Writing sys_setup.py


In [23]:
PROJ_NAME = 'ML-Mnemonist'

from IPython.display import clear_output
%run /content/sys_setup.py
resp = input("Do you want to install packages? [y/n] ")
if resp == 'y':
  !pip install -r /content/ML-Mnemonist/requirements.txt
  input("Requrements installed! -- press any key to continue ...")
clear_output()

import sys
sys.path.append(f'/content/{PROJ_NAME}')
os.chdir(f'/content/{PROJ_NAME}')
print("Running complete!")

Running complete!


<Figure size 432x288 with 0 Axes>

# Local Runtime

In [3]:
import sys
sys.path.append('.')

# Expand configurations

The configuration available at `conf-test-branches.yaml`
contains multiple scenarios for the configurations to occur.
We may expand all the possible configurations using the function below
to create all the possible outcomes.

In [1]:
%load_ext autoreload

In [2]:
import os
from mlmnemonist.validation_tools import expand_cfg
from testing.config.config import get_cfg_defaults
expand_cfg(get_cfg_defaults(),
           cfg_dir='conf-test-branches.yaml',
           save_directory='config/all-branches')
%autoreload 3

Now let's create the same model we created before



In [4]:
from mlmnemonist.experiment_runner import ExperimentRunner
from mlmnemonist.experiment_factory import ExperimentRunnerFactory
import pandas as pd

from testing.config.config import get_cfg_defaults
import os

factory = ExperimentRunnerFactory(cfg_base=get_cfg_defaults())


%autoreload 3

In [None]:
runner = factory.create(experiment_name='my-cv-fold',
                        cfg_dir='conf-test.yaml',
                        verbose=4)

In [6]:
runner = factory.retrieve('0')

In [7]:
print(runner)

Runner at /home/hamidreza/myprojects/ML-Mnemonist/testing/mnemonic-experiments/2022-07-01-my-cv-fold-2
	 - cache token: 0
	 - configurations at: /home/hamidreza/myprojects/ML-Mnemonist/testing/config/conf-test.yaml
	 - preprocessings functions []
	 - recurring pipeline []
	 - Run function not implemented!


In [8]:
runner.preprocessing_pipeline.clear_functions()

In [9]:
def load_raw_data(runner: ExperimentRunner):
  train_name = runner.cfg.DATASET.TRAIN_NAME
  test_name = runner.cfg.DATASET.TEST_NAME
  runner.train_df = pd.read_csv(runner.reveal_true_path(train_name))
  runner.test_df = pd.read_csv(runner.reveal_true_path(test_name))
runner.preprocessing_pipeline.update_function(load_raw_data)

In [10]:
def process_data(runner: ExperimentRunner):
  runner.train_X = runner.train_df.drop(columns='median_house_value').to_numpy()
  runner.train_Y = runner.train_df['median_house_value'].to_numpy()
  runner.test_X = runner.test_df.drop(columns='median_house_value').to_numpy()
  runner.test_Y = runner.test_df['median_house_value'].to_numpy()
runner.preprocessing_pipeline.update_function(process_data)

In [11]:
print(runner)

Runner at /home/hamidreza/myprojects/ML-Mnemonist/testing/mnemonic-experiments/2022-07-01-my-cv-fold-2
	 - cache token: 0
	 - configurations at: /home/hamidreza/myprojects/ML-Mnemonist/testing/config/conf-test.yaml
	 - preprocessings functions ['load_raw_data', 'process_data']
	 - recurring pipeline []
	 - Run function not implemented!


In [12]:
# Run the functions in line
runner.preprocess()


[1/2] Running load_raw_data
[2/2] Running process_data


In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MyMLP(nn.Module):
    def __init__(self, input_features: int, hidden_layer_1: int, hidden_layer_2: int):
        super(MyMLP, self).__init__()
        self.l1 = nn.Linear(input_features, hidden_layer_1)
        self.l2 = nn.Linear(hidden_layer_1, hidden_layer_2)
        self.l3 = nn.Linear(hidden_layer_2, 1)

    def forward(self, x):
        x = self.l1(x)
        x = F.relu(x)
        x = self.l2(x)
        x = F.relu(x)
        x = self.l3(x)
        return x


In [14]:
import torch
def setup_device(runner: ExperimentRunner):
  # extract the device from config
  device = runner.cfg.SOLVER.DEVICE
  if device == 'cpu':
    runner.device = torch.device('cpu')
  elif device == 'cuda' and torch.cuda.is_available():
    runner.device = torch.device('cuda')
  else:
    raise NotImplementedError(f"device {device} is not implemented!")
runner.recurring_pipeline.update_function(setup_device)

In [15]:

def setup_model(runner: ExperimentRunner):
  # extract the method from config
  method = runner.cfg.SOLVER.METHOD
  # construct the model from registry
  cfg_h_params = runner.cfg.MODEL.HYPER_PARAMETERS
  my_model = MyMLP(input_features=cfg_h_params.IN_FEATURES,
                   hidden_layer_1=cfg_h_params.H1,
                   hidden_layer_2=cfg_h_params.H2)
  # set the function in the cache to save weights
  my_model = runner.CACHE.SET_M('mlp-key', my_model)
  my_model.to(runner.device)
  if runner.verbose > 0:
    print("Model state dict")
    print(my_model)
runner.recurring_pipeline.update_function(setup_model)

In [16]:
import torch
import torch.nn as nn

def setup_training(runner: ExperimentRunner):
  my_model = runner.CACHE.GET_M('mlp-key').to(runner.device)
  runner.criterion = nn.MSELoss()
  if runner.cfg.SOLVER.OPTIMIZER_TYPE == 'adam':
    runner.optim = torch.optim.Adam(my_model.parameters(), lr=runner.cfg.SOLVER.LR)
  else:
    raise NotImplementedError(f"Optimizer type not implemented"
                              f"{runner.cfg.SOLVER.OPTIMIZER_TYPE}")
runner.recurring_pipeline.update_function(setup_training)

In [17]:
print(runner)

Runner at /home/hamidreza/myprojects/ML-Mnemonist/testing/mnemonic-experiments/2022-07-01-my-cv-fold-2
	 - cache token: 0
	 - configurations at: /home/hamidreza/myprojects/ML-Mnemonist/testing/config/conf-test.yaml
	 - preprocessings functions ['load_raw_data', 'process_data']
	 - recurring pipeline ['setup_device', 'setup_model', 'setup_training']
	 - Run function not implemented!


# Implement a run function

A run function is the core part of an experiment. This function takes in an input of type `ExperimentRunner` type is its first element and a bunch of arbitrary input types for the rest.


In [15]:
runner.CACHE.RESET()

In [18]:
from tqdm import tqdm
from IPython.display import clear_output
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.tensorboard import SummaryWriter

def helper(model, X, y, history, device, loss_fn, optimizer=None):
  for st in range(0, X.shape[0], 500):
    en = min(st + 500, X.shape[0])
    X_ = torch.from_numpy(X[st:en, :]).float().to(device)
    pred = model(X_)
    y_ = torch.from_numpy(y[st:en]).float().to(device)
    loss = loss_fn(pred.squeeze(), y_)
    if optimizer is not None:
      loss.backward()
      optimizer.step()
    history.append(loss.detach().cpu().item())
    
def my_custom_run(runner: ExperimentRunner, show_freq=5):
  # print(runner.CACHE._cached_primitives)
  # return
  # Get model from cache
  my_model = runner.CACHE.GET_M('mlp-key')

  # Get the epoch number and history from cache
  # if it is not cached from before set it to zero
  epoch_i = runner.CACHE.SET_IFN('epoch_i', 0)
  loss_history = runner.CACHE.SET_IFN('loss-history', [[], []])
  writer = SummaryWriter(log_dir=runner.CACHE.LOGS_DIR)
  inds = np.arange(runner.train_X.shape[0])
  for epoch_i in range(epoch_i, 100):

    np.random.shuffle(inds)

    loss_train = []
    my_model.train()
    helper(my_model, runner.train_X[inds],
           runner.train_Y[inds], loss_train, runner.device,
           runner.criterion, runner.optim)
    mean_loss = sum(loss_train) / len(loss_train)
    loss_history[0].append(mean_loss)
    train_loss = mean_loss
    loss_test = []
    my_model.eval()
    with torch.no_grad():
      helper(my_model, runner.test_X,
            runner.test_Y, loss_test, runner.device,
            runner.criterion)
    mean_loss = sum(loss_test) / len(loss_test)
    loss_history[1].append(mean_loss)
    test_loss = mean_loss
    writer.add_scalars('losses', {
      'train': train_loss,
      'test': test_loss
    }, epoch_i)
    if (epoch_i + 1) % show_freq == 0:
      if runner.verbose > 0:
        clear_output()
        plt.plot(list(range(len(loss_history[0]))), loss_history[0], label='loss-train')
        plt.plot(list(range(len(loss_history[1]))), loss_history[1], label='loss-test')
        plt.legend()
        plt.show()

    # Caching and saving checkpoints
    runner.CACHE.SET('epoch_i', epoch_i)
    runner.CACHE.SET('loss-history', loss_history)
    runner.CACHE.SET_M('mlp-key', my_model)
    runner.CACHE.SAVE()

  return loss_history[0][-1]

runner.implement_run(my_custom_run)

Run the following gridsearch algorithm.


In [19]:
from mlmnemonist.validation_tools import grid_search
grid_search(runner,
            cache_token='mine',
            verbose=1,
            cfg_base=get_cfg_defaults(),
            all_cfg_dir='config/all-branches')

%autoreload 3


Iteration no. [3/12] -- Running 1-0-1-MLM-conf-test-branches.yaml : 27561897743.058823
Iteration no. [4/12] -- Running 2-1-0-MLM-conf-test-branches.yaml : 55214974494.117645
Iteration no. [5/12] -- Running 0-0-1-MLM-conf-test-branches.yaml : 16666299060.705883
Iteration no. [6/12] -- Running 0-0-0-MLM-conf-test-branches.yaml : 21333825897.411766
Iteration no. [7/12] -- Running 0-1-0-MLM-conf-test-branches.yaml : 18080709089.882355
Iteration no. [8/12] -- Running 1-1-1-MLM-conf-test-branches.yaml : 26806452886.588234
Iteration no. [9/12] -- Running 1-1-0-MLM-conf-test-branches.yaml : 29983394876.235294
Iteration no. [10/12] -- Running 2-0-1-MLM-conf-test-branches.yaml : 55504138601.411766
Iteration no. [11/12] -- Running 2-1-1-MLM-conf-test-branches.yaml : 53756621884.23529
