# CS7643 - Final Project

In [2]:
# built-in
from collections import Counter
# public
import numpy as np
import torch
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
# private
from config import Config, InstrConfig, ILConfig
from src.utils import dataloader, helpers
from src.models.instructions_generator_model import InstructionsGeneratorModel
from src.trainer import instructions_generator_trainer
from src.models.imitation_learning_model import ImitationLearningModel
from src.trainer import imitation_learning_trainer
from src.gamer import game_env

%load_ext autoreload 
%autoreload 2
%config Completer.use_jedi = False

# Initialization

In [3]:
config = Config()
helpers.set_seed(config.random_seed)

# Data

## Load Data

In [4]:
# train size 202831
# unique number of instructions 35921
data = dataloader.load_pkl(workdir=config.DATA_PATH)

In [5]:
train_states, train_inventories, train_actions, train_goals, train_instructions, all_instructions = data

In [6]:
# remove invalid sample where train instruction is None
invalid_index = set([i for i, _ in enumerate(train_instructions) if not _])
print(len(invalid_index))

34


In [7]:
valid_index = [i for i, _ in enumerate(train_instructions) if _]
print(len(valid_index))
train_states = np.array(train_states)[valid_index].tolist()
train_inventories = np.array(train_inventories)[valid_index].tolist()
train_actions = np.array(train_actions)[valid_index].tolist()
train_instructions = np.array(train_instructions)[valid_index].tolist()

202797


In [8]:
# action size
c = Counter()
c.update(train_actions)
print(c)
len(c)

Counter({'up': 37842, 'left': 37539, 'down': 37211, 'right': 35955, 'grab': 14583, 'craft': 13064, 'mine': 11701, 'toggle_switch': 7504, 'stop': 7398})


9

## Build Vocab

In [9]:
vocab, vocab_weights = dataloader.generate_vocab(
    all_instructions
    , config.device
    , workdir=config.DATA_PATH
    , cache=config.glove_cache)

Total vocabulary size: 212


---

# Instructions Generator

In [None]:
instr_config = InstrConfig()

## Generate Dataset

In [9]:
dataset = dataloader.CraftingDataset(
    instr_config.embedding_dim
    , train_states
    , train_inventories
    , train_actions
    , train_goals
    , train_instructions
    , vocab
    , cache=instr_config.glove_cache)

embedding loaded
one hot loaded
actions loaded
goals loaded
done loading dataset


In [10]:
min([len(d[-1]) for d in dataset])

3

In [11]:
instr_config.dataset_size = len(dataset)

## Split Dataset

In [12]:
indices = list(range(instr_config.dataset_size))
split = int(np.floor(instr_config.validation_split * instr_config.dataset_size))
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

## Initialize Data Loader

In [13]:
train_data_loader = DataLoader(
  dataset,
  batch_size=instr_config.batch_size,
  num_workers=instr_config.num_workers,
  pin_memory=True,
  sampler=train_sampler,
  collate_fn=dataloader.collate_fn)

validation_data_loader = DataLoader(
  dataset,
  batch_size=instr_config.batch_size,
  num_workers=instr_config.num_workers,
  pin_memory=True,
  sampler=valid_sampler,
  collate_fn=dataloader.collate_fn)

## Setup Training

In [14]:
model = InstructionsGeneratorModel(
    instr_config.device
    , len(vocab)
    , instr_config.embedding_dim
    , vocab_weights
).to(instr_config.device)
train = instructions_generator_trainer.train
validate = instructions_generator_trainer.validate
# CE Loss
criterion = torch.nn.CrossEntropyLoss()
# Adam
parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(parameters, lr=instr_config.learning_rate)
# Log
writer = SummaryWriter() if instr_config.summary_writer else None

## GO

In [15]:
best_valid_loss, best_valid_bleu, best_valid_tk_acc = float('inf'), float('inf'), float('inf')
valid_epoch, best_epoch = 0, None

for epoch in range(instr_config.epochs):
    # train
    loss, bleu, tk_acc = train(
        instr_config.device,
        epoch,
        train_data_loader,
        model,
        optimizer,
        criterion,
        parameters,
        vocab,
        summary_writer=writer)
    print('Overall Epoch: %d, train loss: %.3f, train bleu: %.3f, train token acc: %.3f' % (epoch, loss, bleu, tk_acc))
    # valid
    loss, bleu, tk_acc = validate(
        instr_config.device,
        epoch,
        validation_data_loader,
        model,
        criterion,
        vocab,
        summary_writer=writer)
    print('Overall Epoch: %d, valid loss: %.3f, valid bleu: %.3f, valid token acc: %.3f' % (epoch, loss, bleu, tk_acc))
    # early stopping
    if loss <= best_valid_loss:
        best_valid_loss, best_valid_bleu, best_valid_tk_acc = loss, bleu, tk_acc
        valid_epoch, best_epoch = 0, epoch
        torch.save(model.state_dict(), instr_config.SAVE_PATH)
        print('Best Epoch: %d, valid loss: %.3f, valid bleu: %.3f, valid token acc: %.3f' \
              % (best_epoch, best_valid_loss, best_valid_bleu, best_valid_tk_acc))
        print('Trained model saved at ', instr_config.SAVE_PATH)
    else:
        valid_epoch += 1
        if valid_epoch >= instr_config.valid_patience:
            break

Epoch: 0, train loss: 4.204, train bleu: 0.497, train token acc: 0.615:   3%| | 


KeyboardInterrupt: 

---

# Reinforcement Learning (RL) - Pre-training

## Imitation Learning (IL)

### Initialization

In [10]:
il_config = ILConfig()

### Generate Dataset

In [11]:
dataset = dataloader.CraftingDataset(
  il_config.embedding_dim,
  train_states,
  train_inventories,
  train_actions,
  train_goals,
  train_instructions,
  vocab)

embedding loaded
one hot loaded
actions loaded
goals loaded
done loading dataset


In [12]:
il_config.dataset_size = len(dataset)

### Split Dataset

In [13]:
indices = list(range(il_config.dataset_size))
split = int(np.floor(il_config.validation_split * il_config.dataset_size))
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

### Initialize Data Loader

In [14]:
train_data_loader = DataLoader(
  dataset,
  batch_size=il_config.batch_size,
  num_workers=il_config.num_workers,
  pin_memory=True,
  sampler=train_sampler,
  collate_fn=dataloader.collate_fn)

validation_data_loader = DataLoader(
  dataset,
  batch_size=il_config.batch_size,
  num_workers=il_config.num_workers,
  pin_memory=True,
  sampler=valid_sampler,
  collate_fn=dataloader.collate_fn)

### Setup Training

In [15]:
model = ImitationLearningModel(il_config).to(il_config.device)
train = imitation_learning_trainer.train
validate = imitation_learning_trainer.validate
validate_game = imitation_learning_trainer.validate_game
# CE Loss
criterion = torch.nn.CrossEntropyLoss()
# Adam
optimizer = torch.optim.Adam(model.parameters(), lr=il_config.learning_rate)
# Log
writer = SummaryWriter() if il_config.summary_writer else None

### GO

In [30]:
best_valid_loss, best_valid_acc, best_valid_reward= float('inf'), float('inf'), float('inf')
valid_epoch, best_epoch = 0, None

for epoch in range(il_config.epochs):
    # train
    loss, accuracy = train(epoch, train_data_loader, model, optimizer, criterion, il_config, summary_writer=writer)
    print('Overall Epoch: %d, train loss: %.3f, train accuracy: %.3f' % (epoch, loss, accuracy))
    # valid
    loss, accuracy = validate(epoch, validation_data_loader, model, criterion, il_config, summary_writer=writer)
    # valid game
    sum_rewards = validate_game(model, il_config)
    print('Overall Epoch: %d, valid loss: %.3f, valid accuracy: %.3f, valid sum rewards: %.3f' % (epoch, loss, accuracy, sum_rewards))
    # early stopping
    if loss <= best_valid_loss:
        best_valid_loss, best_valid_acc, best_valid_reward = loss, accuracy, sum_rewards
        valid_epoch, best_epoch = 0, epoch
        torch.save(model.state_dict(), il_config.SAVE_PATH)
        print('Best Epoch: %d, valid loss: %.3f, valid accuracy: %.3f, valid sum rewards: %.3f' \
              % (best_epoch, best_valid_loss, best_valid_acc, best_valid_reward))
        print('Trained model saved at ', il_config.SAVE_PATH)
    else:
        valid_epoch += 1
        if valid_epoch >= il_config.valid_patience:
            break

Epoch: 0, train loss: 0.886, train accuracy: 0.694: 100%|█| 2535/2535 [01:53<00:


Overall Epoch: 0, train loss: 1.269, train accuracy: 0.511


Epoch: 0, valid loss: 0.865, valid accuracy: 0.681: 100%|█| 634/634 [00:11<00:00
100%|███████████████████████████████████████████| 15/15 [00:04<00:00,  3.56it/s]


Overall Epoch: 0, valid loss: 1.030, valid accuracy: 0.000, valid sum rewards


## IL with Instructions

In [35]:
# TODO

---

# Reinforcement Learning (RL) - Fine-tuning

In [34]:
# TODO