In [3]:
import os

import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from utils import tabularModel, titanicDataset

from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.tune.suggest.bayesopt import BayesOptSearch

import time
%matplotlib inline

ImportError: cannot import name 'SearchGenerator'

In [None]:
train_data = pd.read_csv(os.path.join('data', 'titanic', 'test.csv'))

In [None]:
train_data.head(10)

In [None]:
def train(config):
    batch_size = int(config['batch_size'])
    num_embedding = int(config['num_embedding'])
    learning_rate = config['learning_rate']

    dataset = titanicDataset(os.path.join('data', 'titanic', 'train.csv'))
    train_dataset, valid_dataset = torch.utils.data.random_split(dataset,
                                                                 [int(0.8 * len(dataset)),
                                                                  len(dataset) - int(0.8 * len(dataset))])
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=batch_size, shuffle=True, num_workers=4)
    valid_dataloader = DataLoader(valid_dataset,
                                 batch_size=len(valid_dataset), shuffle=False, num_workers=4)
    categorical_columns = ['Pclass', 'Sex', 'Embarked', 'binned_family']
    categories = [3, 2, 3, 4]
    continuous_columns = ['Fare', 'Age']
    embedding_size = list(zip(categories, [num_embedding] * 4))
    model = tabularModel(embedding_size, categorical_columns, continuous_columns).double()
    optim = torch.optim.Adagrad(model.parameters(), lr=learning_rate, weight_decay=0.95)
    lossfunc = nn.CrossEntropyLoss()
    num_epoch = 300
    best_accu = 0
    cnt = 0
    model.train()
    for epoch in range(num_epoch):
        for i, (x, y) in enumerate(train_dataloader):
            optim.zero_grad()
            pred = model(x)
            loss = lossfunc(pred, y.squeeze().long())
            loss.backward()
            optim.step()

            if i % 10 == 0 or i == (len(train_dataset) / 16) - 1 :
                with torch.no_grad():
                    model.eval()
                    x, y = next(iter(valid_dataloader))
                    pred = model(x)
                    valid_loss = lossfunc(pred, y.squeeze().long())
                    accu = (pred.argmax(-1) == y.squeeze()).sum().item() / len(valid_dataset)
                print("Epoch: {} | Train loss: {:.4f} | Valid loss: {:.4f} | Valid Accu: {:.4f}".format(
                    epoch, loss.item(), valid_loss.item(), accu
                ))
                model.train()
        print("\n {:.4f} {:.4f} {} \n".format(1.02 * best_accu, accu, cnt))
        if accu >= best_accu:
            cnt = 0
            best_accu = accu
            # torch.save(model.state_dict(), os.path.join('model', 'torch_nn',
            #                                             'model-{}.pt'.format(epoch)))
        else:
            cnt += 1

        if cnt == 50:
            print("Early stopped!")
            break
    return best_accu

In [None]:
def raytune_trainable(config, checkpoint_dir=None):
    tune.report(accu=train(config))



In [None]:
search_space = {
    'batch_size': (2, 64),
    'num_embedding': (4, 256),
    'learning_rate': (1e-4, 0.1)
}

In [None]:
algo = BayesOptSearch(space=search_space, metric='accu', mode='max', random_search_steps=10)
start = time.time()
analysis = tune.run(raytune_trainable, search_alg=algo, num_samples=300, checkpoint_freq=4,
                    checkpoint_at_end=True,
                    local_dir=os.curdir,
                    # resources_per_trial={'cpu': 9},
                    scheduler = ASHAScheduler(
                        metric='accu',
                        mode='max')
                   )
end = time.time()
print(end-start)