In [10]:
# model.py
"""
Run main.py to start.

This script is modified from PyTorch quickstart:
https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
"""

import nni
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

# Get optimized hyperparameters
params = {'features': 512, 'lr': 0.001, 'momentum': 0}
optimized_params = nni.get_next_parameter()
params.update(optimized_params)

# Load dataset
training_data = datasets.FashionMNIST(root='data', train=True, download=True, transform=ToTensor())
test_data = datasets.FashionMNIST(root='data', train=False, download=True, transform=ToTensor())
train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

# Build model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28, params['features']),
    nn.ReLU(),
    nn.Linear(params['features'], params['features']),
    nn.ReLU(),
    nn.Linear(params['features'], 10)
).to(device)

# Training functions
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=params['lr'], momentum=params['momentum'])

def train(dataloader, model, loss_fn, optimizer):
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def test(dataloader, model, loss_fn):
    model.eval()
    correct = 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    return correct / len(dataloader.dataset)

# Train the model
epochs = 5
for t in range(epochs):
    train(train_dataloader, model, loss_fn, optimizer)
    accuracy = test(test_dataloader, model, loss_fn)
    nni.report_intermediate_result(accuracy)
nni.report_final_result(accuracy)


[2022-12-10 11:11:47] [32mIntermediate result: 0.4981  (Index 0)[0m
[2022-12-10 11:12:17] [32mIntermediate result: 0.5755  (Index 1)[0m
[2022-12-10 11:12:46] [32mIntermediate result: 0.6161  (Index 2)[0m
[2022-12-10 11:13:17] [32mIntermediate result: 0.6354  (Index 3)[0m
[2022-12-10 11:13:44] [32mIntermediate result: 0.6479  (Index 4)[0m
[2022-12-10 11:13:44] [32mFinal result: 0.6479[0m


In [9]:
# main.py
from pathlib import Path
import signal

from nni.experiment import Experiment

# Define search space
search_space = {
    'features': {'_type': 'choice', '_value': [128, 256, 512, 1024]},
    'lr': {'_type': 'loguniform', '_value': [0.0001, 0.1]},
    'momentum': {'_type': 'uniform', '_value': [0, 1]},
}

# Configure experiment
experiment = Experiment('local')
experiment.config.trial_command = 'python model.py'
# experiment.config.trial_code_directory = Path(__file__).parent
experiment.config.search_space = search_space
experiment.config.tuner.name = 'Random'
experiment.config.max_trial_number = 10
experiment.config.trial_concurrency = 2

# Run it!
experiment.run(port=8081, wait_completion=False)

print('Experiment is running. Press Ctrl-C to quit.')
# signal.pause()

[2022-12-10 11:10:42] [32mCreating experiment, Experiment ID: [36mdrg6vnfj[0m
[2022-12-10 11:10:42] [32mStarting web server...[0m
[2022-12-10 11:10:44] [32mSetting up...[0m
[2022-12-10 11:10:44] [32mWeb portal URLs: [36mhttp://169.254.107.54:8081 http://169.254.146.219:8081 http://169.254.103.200:8081 http://192.168.1.12:8081 http://169.254.6.133:8081 http://127.0.0.1:8081[0m
Experiment is running. Press Ctrl-C to quit.


In [12]:
! nnictl stop drg6vnfj
# stop + the Experiment_ID

INFO:  Stopping experiment drg6vnfj
INFO:  Stop experiment success.
