In [1]:
import sys
sys.path.append("../")
from third_party.sentence_classification.train_classifier import train_sentiment
from third_party.sentence_classification.train_classifier import eval_model
from third_party.sentence_classification import dataloader
from third_party.sentence_classification import modules
from quant_embedding import QuantEmbedding
from quant_embedding import quantize_embed
import argparse
import logging
from copy import deepcopy

## Train a model and save check points

In [2]:
import torch
assert torch.__version__ >= '0.4'

In [3]:
cmdlines = ["--dataset", 'trec', 
            "--path", "../third_party/sentence_classification/data/", 
            "--embedding", "../glove.6B.300d.txt", 
            "--cnn", 
            "--max_epoch", str(100), 
            "--model_seed", str(1), 
            "--data_seed", str(1),
            "--lr", str(0.001),
            "--save_mdl", "./model_ckpt",
            "--dropout", 0.0]
err_valid, err_test = train_sentiment(cmdlines)
print(err_valid, err_test)

['--dataset', 'trec', '--path', '../third_party/sentence_classification/data/', '--embedding', '../glove.6B.300d.txt', '--cnn', '--max_epoch', '100', '--model_seed', '1', '--data_seed', '1', '--lr', '0.001', '--save_mdl', './model_ckpt', '--dropout', 0.0]
INFO:root:Embedding hash: b78f53fb56ec1ce9edc367d2e6186ba4
INFO:root:Machine: dawn8.stanford.edu
INFO:root:CMD: python /lfs/1/zjian/anaconda2/envs/pytorch_1.0/lib/python3.6/site-packages/ipykernel_launcher.py -f /run/user/16494/jupyter/kernel-2d2fd2c3-1ff8-4514-ad67-c3a7b174f2c9.json
INFO:root:Command Line Args
INFO:root:cnn            : 1         
INFO:root:lstm           : 0         
INFO:root:la             : 0         
INFO:root:no_normalize   : 0         
INFO:root:dataset        : trec      
INFO:root:path           : ../third_party/sentence_classification/data/
INFO:root:embedding      : ../glove.6B.300d.txt
INFO:root:batch_size     : 32        
INFO:root:max_epoch      : 100       
INFO:root:d              : 128       
INFO:ro

  x, y = Variable(x, volatile=True), Variable(y)


INFO:root:Epoch=1 iter=308 lr=0.001000 train_loss=0.378204 valid_err=0.194139
INFO:root:Epoch=2 iter=462 lr=0.001000 train_loss=0.131971 valid_err=0.164835
INFO:root:Epoch=3 iter=616 lr=0.001000 train_loss=0.061415 valid_err=0.144689
INFO:root:Epoch=4 iter=770 lr=0.001000 train_loss=0.037204 valid_err=0.137363
INFO:root:Epoch=5 iter=924 lr=0.001000 train_loss=0.025701 valid_err=0.133700
INFO:root:Epoch=6 iter=1078 lr=0.001000 train_loss=0.019831 valid_err=0.126374
INFO:root:Epoch=7 iter=1232 lr=0.001000 train_loss=0.014343 valid_err=0.128205
INFO:root:Epoch=8 iter=1386 lr=0.001000 train_loss=0.009624 valid_err=0.124542
INFO:root:Epoch=9 iter=1540 lr=0.001000 train_loss=0.006781 valid_err=0.126374
INFO:root:Epoch=10 iter=1694 lr=0.001000 train_loss=0.004996 valid_err=0.117216
INFO:root:Epoch=11 iter=1848 lr=0.001000 train_loss=0.003826 valid_err=0.122711
INFO:root:Epoch=12 iter=2002 lr=0.001000 train_loss=0.002954 valid_err=0.120879
INFO:root:Epoch=13 iter=2156 lr=0.001000 train_loss=0.

## Load the saved model and data

In [4]:
def parse_args(cmdline_args):
    argparser = argparse.ArgumentParser(sys.argv[0], conflict_handler='resolve')
    argparser.add_argument("--cnn", action='store_true', help="whether to use cnn")
    argparser.add_argument("--dataset", type=str, default="mr", help="which dataset")
    argparser.add_argument("--path", type=str, required=True, help="path to corpus directory")
    argparser.add_argument("--no_normalize", action='store_true', help="Do not normalize embeddings")
    argparser.add_argument("--embedding", type=str, help="word vectors")
    argparser.add_argument("--batch_size", "--batch", type=int, default=32)
    argparser.add_argument("--max_epoch", type=int, default=100)
    argparser.add_argument("--d", type=int, default=128)
    argparser.add_argument("--dropout", type=float, default=0.5)
    argparser.add_argument("--depth", type=int, default=2)
    argparser.add_argument("--lr", type=float, default=0.001)
    argparser.add_argument("--model_seed", type=int, default=1234)
    argparser.add_argument("--data_seed", type=int, default=1234)
    argparser.add_argument("--save_mdl", type=str, default=None, help="Save model to this file.")
    argparser.add_argument("--load_mdl", type=str, default=None, help="Load model from this file.")
    argparser.add_argument("--out", type=str, help="Path to output directory.")
    print(cmdline_args)
    args = argparser.parse_args(cmdline_args)
    return args

In [5]:
cmdlines = ["--dataset", 'trec', 
            "--path", "../third_party/sentence_classification/data/", 
            "--embedding", "../glove.6B.300d.txt", 
            "--cnn", 
            "--max_epoch", str(25), 
            "--model_seed", str(1), 
            "--data_seed", str(1),
            "--lr", str(0.001),
            "--load_mdl", "./model_ckpt",
            "--dropout", 0.0]
args = parse_args(cmdlines)

['--dataset', 'trec', '--path', '../third_party/sentence_classification/data/', '--embedding', '../glove.6B.300d.txt', '--cnn', '--max_epoch', '25', '--model_seed', '1', '--data_seed', '1', '--lr', '0.001', '--load_mdl', './model_ckpt', '--dropout', 0.0]


In [6]:
train_x, train_y, valid_x, valid_y, test_x, test_y = \
    dataloader.read_split_dataset(args.path, args.dataset)
data = train_x + valid_x + test_x
    
emb_layer = modules.EmbeddingLayer(
    args.d, data,
    embs = dataloader.load_embedding(args.embedding),
    normalize=not args.no_normalize
)
    
train_x, train_y = dataloader.create_batches(
    train_x, train_y,
    args.batch_size,
    emb_layer.word2id,
    sort = args.dataset == 'sst'
)
valid_x, valid_y = dataloader.create_batches(
    valid_x, valid_y,
    args.batch_size,
    emb_layer.word2id,
    sort = args.dataset == 'sst'
)
test_x, test_y = dataloader.create_batches(
    test_x, test_y,
    args.batch_size,
    emb_layer.word2id,
    sort = args.dataset == 'sst'
)
model = torch.load(args.load_mdl)

INFO:root:Beginning to load embeddings
INFO:root:Finished loading embeddings
INFO:root:400000 pre-trained word embeddings loaded.
INFO:root:Number of vectors: 403717, Number of loaded vectors: 400000, Number of oov 3717
INFO:root:embedding shape: torch.Size([403717, 300])
INFO:root:154 batches, avg len: 21.0
INFO:root:18 batches, avg len: 20.8
INFO:root:16 batches, avg len: 14.4


## Case 1: use quantized model for inference

###### Replace embedding layers with quantized embedding layers

In [7]:
# compress the embedding layer can take upto 1 min
model_comp = quantize_embed(deepcopy(model), nbit=2).cuda()
model_uncomp = deepcopy(model).cuda()

INFO:root:Replaced embedding in EmbeddingLayer


##### Perform inference

In [8]:
err_comp = eval_model(model_comp, test_x, test_y)
err_uncomp = eval_model(model_uncomp, test_x, test_y)
print("Quantized model error", err_comp)
print("Original model error", err_uncomp)

Quantized model error 0.09399999999999997
Original model error 0.07799999999999996


## Case 2: use quantized model for training with fixed pretrained embedding

##### Replace embedding layers with quantized embedding layers

In [9]:
model_comp = quantize_embed(deepcopy(model), nbit=2).cuda()
model_uncomp = deepcopy(model).cuda()

INFO:root:Replaced embedding in EmbeddingLayer


##### Perform training on layers other than the embeddings

In [10]:
need_grad = lambda x: x.requires_grad
optimizer = torch.optim.Adam(
    filter(need_grad, model_comp.parameters()),
    lr = args.lr
)
criterion = torch.nn.CrossEntropyLoss()

In [11]:
for x, y in zip(train_x, train_y):
    model_comp.zero_grad()
    x, y = torch.autograd.Variable(x), torch.autograd.Variable(y)
    output = model_comp(x)
    loss = criterion(output, y)
    loss.backward()
    optimizer.step()