In [2]:
from fastai.text import *
import fire

BOS = 'xbos'  # beginning-of-sentence tag
FLD = 'xfld'  # data field tag

BOS_LABEL = '_bos_'
PAD = '_pad_'

re1 = re.compile(r'  +')


def read_file(filepath):
    assert os.path.exists(filepath)
    sentences = []
    labels = []
    with open(filepath, encoding='utf-8') as f:
        sentence = [BOS]
        sentence_labels = [BOS_LABEL]
        for line in f:
            if line == '\n':
                sentences.append(sentence)
                labels.append(sentence_labels)
                sentence = [BOS]  # use xbos as the start of sentence token
                sentence_labels = [BOS_LABEL]
            else:
                sentence.append(line.split()[0].lower())
                # label is generally in the last column
                sentence_labels.append(line.split()[-1])
        if sentence:  # some files, e.g. NER end on an empty line
            sentences.append(sentence)
            labels.append(sentence_labels)
    return sentences, labels


def create_toks(prefix, max_vocab=30000, min_freq=1):
    PATH = f'data/nlp_seq/{prefix}/'

    names = {}
    if prefix == 'ner':
        names['train'] = 'train.txt'
        names['val'] = 'valid.txt'
        names['test'] = 'test.txt'
    else:
        raise ValueError(f'Filenames for {prefix} have to be added first.')
    paths = {}
    for split in ['train', 'val', 'test']:
        paths[split] = f'{PATH}{names[split]}'

    print(f'prefix {prefix} max_vocab {max_vocab} min_freq {min_freq}')

    os.makedirs(f'{PATH}tmp', exist_ok=True)
    trn_tok, trn_labels = read_file(paths['train'])
    val_tok, val_labels = read_file(paths['val'])
    test_tok, test_labels = read_file(paths['test'])

    for trn_t, trn_l in zip(trn_tok[:5], trn_labels[:5]):
        print('Sentence:', trn_t, 'labels:', trn_l)

    print(f'# of train: {len(trn_tok)}, # of val: {len(val_tok)},'
          f'# of test: {len(test_tok)}')

    freq = Counter(p for o in trn_tok for p in o)
    print(freq.most_common(25))
    itos = [o for o, c in freq.most_common(max_vocab) if c > min_freq]
    itos.insert(0, PAD)
    itos.insert(0, '_unk_')
    stoi = collections.defaultdict(lambda: 0,
                                   {v: k for k, v in enumerate(itos)})
    print(len(itos))

    trn_ids = np.array([[stoi[o] for o in p] for p in trn_tok])
    val_ids = np.array([[stoi[o] for o in p] for p in val_tok])
    test_ids = np.array([[stoi[o] for o in p] for p in test_tok])

    # map the labels to ids
    freq = Counter(p for o in trn_labels for p in o)
    print(freq)
    itol = [l for l, c in freq.most_common()]
    itol.insert(1, PAD)  # insert padding label at index 1
    print(itol)
    ltoi = {l: i for i, l in enumerate(itol)}
    trn_lbl_ids = np.array([[ltoi[o] for o in p] for p in trn_labels])
    val_lbl_ids = np.array([[ltoi[o] for o in p] for p in val_labels])
    test_lbl_ids = np.array([[ltoi[o] for o in p] for p in test_labels])

    ids_joined = np.array([[stoi[o] for o in p] for p in trn_tok + val_tok + test_tok])
    val_ids_joined = ids_joined[int(len(ids_joined)*0.9):]
    ids_joined = ids_joined[:int(len(ids_joined)*0.9)]

    np.save(f'{PATH}tmp/trn_ids.npy', trn_ids)
    np.save(f'{PATH}tmp/val_ids.npy', val_ids)
    np.save(f'{PATH}tmp/test_ids.npy', test_ids)
    np.save(f'{PATH}tmp/lbl_trn.npy', trn_lbl_ids)
    np.save(f'{PATH}tmp/lbl_val.npy', val_lbl_ids)
    np.save(f'{PATH}tmp/lbl_test.npy', test_lbl_ids)
    pickle.dump(itos, open(f'{PATH}tmp/itos.pkl', 'wb'))
    pickle.dump(itol, open(f'{PATH}tmp/itol.pkl', 'wb'))
    np.save(f'{PATH}tmp/trn_lm_ids.npy', ids_joined)
    np.save(f'{PATH}tmp/val_lm_ids.npy', val_ids_joined)


In [3]:
create_toks('ner')

prefix ner max_vocab 30000 min_freq 1
Sentence: ['xbos', '-docstart-'] labels: ['_bos_', 'O']
Sentence: ['xbos', 'eu', 'rejects', 'german', 'call', 'to', 'boycott', 'british', 'lamb', '.'] labels: ['_bos_', 'I-ORG', 'O', 'I-MISC', 'O', 'O', 'O', 'I-MISC', 'O', 'O']
Sentence: ['xbos', 'peter', 'blackburn'] labels: ['_bos_', 'I-PER', 'I-PER']
Sentence: ['xbos', 'brussels', '1996-08-22'] labels: ['_bos_', 'I-LOC', 'O']
Sentence: ['xbos', 'the', 'european', 'commission', 'said', 'on', 'thursday', 'it', 'disagreed', 'with', 'german', 'advice', 'to', 'consumers', 'to', 'shun', 'british', 'lamb', 'until', 'scientists', 'determine', 'whether', 'mad', 'cow', 'disease', 'can', 'be', 'transmitted', 'to', 'sheep', '.'] labels: ['_bos_', 'O', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'I-MISC', 'O', 'O', 'O', 'O', 'O', 'I-MISC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
# of train: 14988, # of val: 3468,# of test: 3686
[('xbos', 14988), ('the', 8390), ('.', 7374), (

In [1]:
% reload_ext autoreload
% autoreload 2
% matplotlib inline

In [2]:
import fire
from fastai.text import *
from fastai.lm_rnn import *

from eval import eval_ner


def freeze_all_but(learner, n):
    c=learner.get_layer_groups()
    for l in c: set_trainable(l, False)
    set_trainable(c[n], True)


def get_rnn_seq_labeler(bptt, max_seq, n_class, n_tok, emb_sz, n_hid, n_layers, pad_token, layers, drops, bidir=False,
                      dropouth=0.3, dropouti=0.5, dropoute=0.1, wdrop=0.5):
    rnn_enc = MultiBatchSeqRNN(bptt, max_seq, n_tok, emb_sz, n_hid, n_layers, pad_token=pad_token, bidir=bidir,
                      dropouth=dropouth, dropouti=dropouti, dropoute=dropoute, wdrop=wdrop)
    # return SequentialRNN(rnn_enc, LinearBlocks(layers, drops))
    return SequentialRNN(rnn_enc, LinearDecoder(n_class, emb_sz, 0.1))


class MultiBatchSeqRNN(RNN_Encoder):
    def __init__(self, bptt, max_seq, *args, **kwargs):
        self.max_seq,self.bptt = max_seq,bptt
        super().__init__(*args, **kwargs)

    def concat(self, arrs):
        return [torch.cat([l[si] for l in arrs]) for si in range(len(arrs[0]))]

    def forward(self, input):
        sl,bs = input.size()
        for l in self.hidden:
            for h in l: h.data.zero_()
        # raw_outputs, outputs = [],[]
        raw_outputs, outputs = super().forward(input)
        # for i in range(0, sl, self.bptt):
        #     r, o = super().forward(input[i: min(i+self.bptt, sl)])
        #     if i>(sl-self.max_seq):
        #         raw_outputs.append(r)
        #         outputs.append(o)
        # return self.concat(raw_outputs), self.concat(outputs)
        return raw_outputs, outputs


class SeqDataLoader(DataLoader):
    def get_batch(self, indices):
        res = self.np_collate([self.dataset[i] for i in indices])
        # res = self.np_collate([self.dataset[i] for i in indices], self.pad_idx)
        # if not self.transpose: return res
        # res[0] = res[0].T
        # print('First seq:', res[0][0])
        # print('First labels:', res[1][0])
        res[1] = np.reshape(res[1], -1)  # reshape the labels to one sequence
        return res


class TextSeqDataset(Dataset):
    def __init__(self, x, y, backwards=False, sos=None, eos=None):
        self.x,self.y,self.backwards,self.sos,self.eos = x,y,backwards,sos,eos

    def __getitem__(self, idx):
        x = self.x[idx]
        y = self.y[idx]  # we need to get y as array
        if self.backwards: x = list(reversed(x))
        if self.eos is not None: x = x + [self.eos]
        if self.sos is not None: x = [self.sos]+x
        return np.array(x),np.array(y)

    def __len__(self): return len(self.x)


def train_seq(dir_path, cuda_id, lm_id='', clas_id=None, bs=64, cl=1, backwards=False, startat=0, unfreeze=True,
              lr=0.01, dropmult=1.0, pretrain=True, bpe=False, use_clr=True,
              use_regular_schedule=False, use_discriminative=True, last=False, chain_thaw=False,
              from_scratch=False, train_file_id=''):
    print(f'prefix {dir_path}; cuda_id {cuda_id}; lm_id {lm_id}; clas_id {clas_id}; bs {bs}; cl {cl}; backwards {backwards}; '
        f'dropmult {dropmult} unfreeze {unfreeze} startat {startat}; pretrain {pretrain}; bpe {bpe}; use_clr {use_clr};'
        f'use_regular_schedule {use_regular_schedule}; use_discriminative {use_discriminative}; last {last};'
        f'chain_thaw {chain_thaw}; from_scratch {from_scratch}; train_file_id {train_file_id}')

    if not hasattr(torch._C, '_cuda_setDevice'):
        print('CUDA not available. Setting device=-1.')
        cuda_id = -1
    torch.cuda.set_device(cuda_id)
    PRE = 'bwd_' if backwards else 'fwd_'
    PRE = 'bpe_' + PRE if bpe else PRE
    IDS = 'bpe' if bpe else 'ids'
    dir_path = Path(dir_path)
    train_file_id = train_file_id if train_file_id == '' else f'_{train_file_id}'
    lm_id = lm_id if lm_id == '' else f'{lm_id}_'
    clas_id = lm_id if clas_id is None else clas_id
    clas_id = clas_id if clas_id == '' else f'{clas_id}_'
    lm_file = '/fs-object-detection/paperspace/fastai/courses/coNLL/data/models/lm1_enc' # there is changed by Emrys
    
    lm_path = dir_path / 'models' / f'{lm_file}.h5'
    if not from_scratch:
        assert lm_path.exists(), f'Error: {lm_path} does not exist.'
    bptt,em_sz,nh,nl = 70,400,1150,3
#     bptt, em_sz, nh, nl = 70, 100, 100, 2

    opt_fn = partial(optim.Adam, betas=(0.8, 0.99))

    if backwards:
        trn_sent = np.load(dir_path / 'tmp' / f'trn_{IDS}{train_file_id}_bwd.npy')
        val_sent = np.load(dir_path / 'tmp' / f'val_{IDS}_bwd.npy')
        test_sent = np.load(dir_path / 'tmp' / f'test_{IDS}_bwd.npy')
    else:
        trn_sent = np.load(dir_path / 'tmp' / f'trn_{IDS}{train_file_id}.npy')
        val_sent = np.load(dir_path / 'tmp' / f'val_{IDS}.npy')
        test_sent = np.load(dir_path / 'tmp' / f'test_{IDS}.npy')

    trn_lbls = np.load(dir_path / 'tmp' / f'lbl_trn{train_file_id}.npy')
    val_lbls = np.load(dir_path / 'tmp' / f'lbl_val.npy')
    test_lbls = np.load(dir_path / 'tmp' / f'lbl_test.npy')
    id2label = pickle.load(open(dir_path / 'tmp' / 'itol.pkl', 'rb'))
    c = len(id2label)

    if bpe:
        vs=30002
    else:
        id2token = pickle.load(open(dir_path / 'tmp' / 'itos.pkl', 'rb'))
        vs = len(id2token)

    print('Train sentences shape:', trn_sent.shape)
    print('Train labels shape:', trn_lbls.shape)
    print('Token ids:', [id2token[id_] for id_ in trn_sent[0]])
    print('Label ids:', [id2label[id_] for id_ in trn_lbls[0]])

    trn_ds = TextSeqDataset(trn_sent, trn_lbls)
    val_ds = TextSeqDataset(val_sent, val_lbls)
    test_ds = TextSeqDataset(test_sent, test_lbls)
    trn_samp = SortishSampler(trn_sent, key=lambda x: len(trn_sent[x]), bs=bs//2)
    val_samp = SortSampler(val_sent, key=lambda x: len(val_sent[x]))
    test_samp = SortSampler(test_sent, key=lambda x: len(test_sent[x]))
    trn_dl = SeqDataLoader(trn_ds, bs//2, transpose=False, num_workers=1, pad_idx=1, sampler=trn_samp)  # TODO why transpose? Should we also transpose the labels?
    val_dl = SeqDataLoader(val_ds, bs, transpose=False, num_workers=1, pad_idx=1, sampler=val_samp)
    test_dl = SeqDataLoader(test_ds, bs, transpose=False, num_workers=1, pad_idx=1, sampler=test_samp)
    md = ModelData(dir_path, trn_dl, val_dl, test_dl)

    dps = np.array([0.4,0.5,0.05,0.3,0.4])*dropmult
    #dps = np.array([0.5, 0.4, 0.04, 0.3, 0.6])*dropmult
    #dps = np.array([0.65,0.48,0.039,0.335,0.34])*dropmult
    #dps = np.array([0.6,0.5,0.04,0.3,0.4])*dropmult

    m = get_rnn_seq_labeler(bptt, 20*70, c, vs, emb_sz=em_sz, n_hid=nh, n_layers=nl, pad_token=1,
              layers=[em_sz, 50, c], drops=[dps[4], 0.1],
              dropouti=dps[0], wdrop=dps[1], dropoute=dps[2], dropouth=dps[3])

    learn = RNN_Learner(md, TextModel(to_gpu(m)), opt_fn=opt_fn)
    learn.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)
    learn.clip=25.
    learn.metrics = [accuracy]

    lrm = 2.6
    if use_discriminative:
#         lrs = np.array([lr/(lrm**3), lr/(lrm**2), lr/lrm, lr])
        ## Emrys
        lrs = np.array([lr/(lrm**4), lr/(lrm**3), lr/(lrm**2), lr/lrm, lr])
        ## end
    else:
        lrs = lr
    wd = 1e-6
    if not from_scratch:
        print(f'Loading encoder from {lm_file}...')
        learn.load_encoder(lm_file)
    else:
        print('Training classifier from scratch. LM encoder is not loaded.')
        use_regular_schedule = True

    if (startat<1) and pretrain and not last and not chain_thaw and not from_scratch:
        learn.freeze_to(-1)
        learn.fit(lrs, 1, wds=wd, cycle_len=None if use_regular_schedule else 1,
                  use_clr=None if use_regular_schedule or not use_clr else (8,3))
        learn.freeze_to(-2)
        learn.fit(lrs, 1, wds=wd, cycle_len=None if use_regular_schedule else 1,
                  use_clr=None if use_regular_schedule or not use_clr else (8, 3))
        learn.save(f'{PRE}{clas_id}clas_0')
    elif startat==1:
        learn.load(f'{PRE}{clas_id}clas_0')

    if chain_thaw:
        lrs = np.array([0.0001, 0.0001, 0.0001, 0.001])
        ## Emrys
        lrm = 2.6
        lrs = np.array([lr/(lrm**4), lr/(lrm**3), lr/(lrm**2), lr/lrm, lr])
        # end
        print('Using chain-thaw. Unfreezing all layers one at a time...')
        n_layers = len(learn.get_layer_groups())
        print('# of layers:', n_layers)
        # fine-tune last layer
        learn.freeze_to(-1)
        print('Fine-tuning last layer...')
        learn.fit(lrs, 1, wds=wd, cycle_len=None if use_regular_schedule else 1,
                  use_clr=None if use_regular_schedule or not use_clr else (8,3))
        n = 0
        # fine-tune all layers up to the second-last one
        while n < n_layers-1:
            print('Fine-tuning layer #%d.' % n)
            freeze_all_but(learn, n)
            learn.fit(lrs, 1, wds=wd, cycle_len=None if use_regular_schedule else 1,
                      use_clr=None if use_regular_schedule or not use_clr else (8,3))
            n += 1

    if unfreeze:
        learn.unfreeze()
    else:
        learn.freeze_to(-3)

    if last:
        print('Fine-tuning only the last layer...')
        learn.freeze_to(-1)

    if use_regular_schedule:
        print('Using regular schedule. Setting use_clr=None, n_cycles=cl, cycle_len=None.')
        use_clr = None
        n_cycles = cl
        cl = None
    else:
        n_cycles = 1
    learn.fit(lrs, n_cycles, wds=wd, cycle_len=cl, use_clr=(8,8) if use_clr else None)
    print('Plotting lrs...')
    learn.sched.plot_lr()
    learn.save(f'{PRE}{clas_id}clas_1')

    eval_ner(learn, id2label, is_test=False)
    eval_ner(learn, id2label, is_test=True)

# if __name__ == '__main__': fire.Fire(train_seq)

  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)
  from numpy.core.umath_tests import inner1d
  return f(*args, **kwds)
  return f(*args, **kwds)


AttributeError: module 'torch.nn.init' has no attribute 'normal_'

In [None]:
train_seq('/fs-object-detection/paperspace/fastai/courses/coNLL/data/nlp_seq/ner/', 0, lm_id='', clas_id=None, bs=64, cl=1, backwards=False, startat=0, unfreeze=True,
              lr=0.01, dropmult=1.0, pretrain=True, bpe=False, use_clr=True,
              use_regular_schedule=False, use_discriminative=True, last=False, chain_thaw=True,
              from_scratch=False, train_file_id='')

In [9]:
class SeqDataLoader(DataLoader):
    def get_batch(self, indices):
        res = self.np_collate([self.dataset[i] for i in indices])
        # res = self.np_collate([self.dataset[i] for i in indices], self.pad_idx)
        # if not self.transpose: return res
        # res[0] = res[0].T
        # print('First seq:', res[0][0])
        # print('First labels:', res[1][0])
        res[1] = np.reshape(res[1], -1)  # reshape the labels to one sequence
        return res


class TextSeqDataset(Dataset):
    def __init__(self, x, y, backwards=False, sos=None, eos=None):
        self.x,self.y,self.backwards,self.sos,self.eos = x,y,backwards,sos,eos

    def __getitem__(self, idx):
        x = self.x[idx]
        y = self.y[idx]  # we need to get y as array
        if self.backwards: x = list(reversed(x))
        if self.eos is not None: x = x + [self.eos]
        if self.sos is not None: x = [self.sos]+x
        return np.array(x),np.array(y)

    def __len__(self): return len(self.x)

dir_path = Path('/fs-object-detection/paperspace/fastai/courses/coNLL/data/nlp_seq/ner/')
bs=64 # batch_size = 2
trn_sent = np.load(dir_path / 'tmp' / f'trn_ids.npy')
val_sent = np.load(dir_path / 'tmp' / f'val_ids.npy')
test_sent = np.load(dir_path / 'tmp' / f'test_ids.npy')

trn_lbls = np.load(dir_path / 'tmp' / f'lbl_trn.npy')
val_lbls = np.load(dir_path / 'tmp' / f'lbl_val.npy')
test_lbls = np.load(dir_path / 'tmp' / f'lbl_test.npy')
id2label = pickle.load(open(dir_path / 'tmp' / 'itol.pkl', 'rb'))
c = len(id2label) + 2 # for start tag and end tag



id2token = pickle.load(open(dir_path / 'tmp' / 'itos.pkl', 'rb'))
vs = len(id2token)

print('Train sentences shape:', trn_sent.shape)
print('Train labels shape:', trn_lbls.shape)
print('Token ids:', [id2token[id_] for id_ in trn_sent[0]])
print('Label ids:', [id2label[id_] for id_ in trn_lbls[0]])

Train sentences shape: (14988,)
Train labels shape: (14988,)
Token ids: ['xbos', '-docstart-']
Label ids: ['_bos_', 'O']


In [10]:
trn_ds = TextSeqDataset(trn_sent, trn_lbls)
val_ds = TextSeqDataset(val_sent, val_lbls)
test_ds = TextSeqDataset(test_sent, test_lbls)
trn_samp = SortishSampler(trn_sent, key=lambda x: len(trn_sent[x]), bs=bs//2)
val_samp = SortSampler(val_sent, key=lambda x: len(val_sent[x]))
test_samp = SortSampler(test_sent, key=lambda x: len(test_sent[x]))
trn_dl = SeqDataLoader(trn_ds, bs//2, transpose=False, num_workers=1, pad_idx=1, sampler=trn_samp)  # TODO why transpose? Should we also transpose the labels?
val_dl = SeqDataLoader(val_ds, bs, transpose=False, num_workers=1, pad_idx=1, sampler=val_samp)
test_dl = SeqDataLoader(test_ds, bs, transpose=False, num_workers=1, pad_idx=1, sampler=test_samp)
md = ModelData('/fs-object-detection/paperspace/fastai/courses/coNLL/data', trn_dl, val_dl, test_dl)

In [12]:
next(iter(trn_dl))[1]


 2
 0
 0
⋮ 
 0
 5
 0
[torch.cuda.LongTensor of size 3648 (GPU 0)]

In [31]:
x = torch.randn(3,4)

In [32]:
V(x)[(V(x) > 0.5)]

Variable containing:
 1.7632
 1.1393
 0.8263
[torch.cuda.FloatTensor of size 3 (GPU 0)]

In [64]:
for i,j in md.trn_dl.dataset:
    if len(i)!=len(j):
        print(False)

In [82]:
for i,j in iter(md.trn_dl):
    print(i.view(-1).size(), j.size())

torch.Size([3648]) torch.Size([3648])
torch.Size([192]) torch.Size([192])
torch.Size([256]) torch.Size([256])
torch.Size([384]) torch.Size([384])
torch.Size([1088]) torch.Size([1088])
torch.Size([64]) torch.Size([64])
torch.Size([704]) torch.Size([704])
torch.Size([288]) torch.Size([288])
torch.Size([1056]) torch.Size([1056])
torch.Size([544]) torch.Size([544])
torch.Size([288]) torch.Size([288])
torch.Size([544]) torch.Size([544])
torch.Size([512]) torch.Size([512])
torch.Size([96]) torch.Size([96])
torch.Size([1056]) torch.Size([1056])
torch.Size([1024]) torch.Size([1024])
torch.Size([576]) torch.Size([576])
torch.Size([544]) torch.Size([544])
torch.Size([1408]) torch.Size([1408])
torch.Size([1312]) torch.Size([1312])
torch.Size([576]) torch.Size([576])
torch.Size([544]) torch.Size([544])
torch.Size([160]) torch.Size([160])
torch.Size([256]) torch.Size([256])
torch.Size([288]) torch.Size([288])
torch.Size([256]) torch.Size([256])
torch.Size([160]) torch.Size([160])
torch.Size([192]) 

torch.Size([800]) torch.Size([800])
torch.Size([672]) torch.Size([672])
torch.Size([416]) torch.Size([416])
torch.Size([384]) torch.Size([384])
torch.Size([288]) torch.Size([288])
torch.Size([96]) torch.Size([96])
torch.Size([160]) torch.Size([160])
torch.Size([1088]) torch.Size([1088])
torch.Size([1056]) torch.Size([1056])
torch.Size([160]) torch.Size([160])
torch.Size([1792]) torch.Size([1792])
torch.Size([1312]) torch.Size([1312])
torch.Size([864]) torch.Size([864])
torch.Size([2176]) torch.Size([2176])
torch.Size([1440]) torch.Size([1440])
torch.Size([384]) torch.Size([384])
torch.Size([352]) torch.Size([352])
torch.Size([288]) torch.Size([288])
torch.Size([1152]) torch.Size([1152])
torch.Size([1120]) torch.Size([1120])
torch.Size([640]) torch.Size([640])
torch.Size([608]) torch.Size([608])
torch.Size([384]) torch.Size([384])
torch.Size([384]) torch.Size([384])
torch.Size([288]) torch.Size([288])
torch.Size([1312]) torch.Size([1312])
torch.Size([1216]) torch.Size([1216])
torch.Size

In [69]:
?? DataLoader

In [84]:
torch.randn(1,2,3,4).size(-1)

4

In [5]:
def get_crf_scrf_label():
    SCRF_l_map = {}
    SCRF_l_map['PER'] = 0
    SCRF_l_map['LOC'] = 1
    SCRF_l_map['ORG'] = 2
    SCRF_l_map['MISC'] = 3
    CRF_l_map = {}
    for pre in ['S-', 'B-', 'I-', 'E-']:
        for suf in SCRF_l_map.keys():
            CRF_l_map[pre + suf] = len(CRF_l_map)
    SCRF_l_map['<START>'] = 4
    SCRF_l_map['<STOP>'] = 5
    SCRF_l_map['O'] = 6
    CRF_l_map['<start>'] = len(CRF_l_map)
    CRF_l_map['<pad>'] = len(CRF_l_map)
    CRF_l_map['O'] = len(CRF_l_map)

    return CRF_l_map, SCRF_l_map


In [6]:
crf_table, scrf_table = get_crf_scrf_label()

In [7]:
crf_table

{'<pad>': 17,
 '<start>': 16,
 'B-LOC': 5,
 'B-MISC': 7,
 'B-ORG': 6,
 'B-PER': 4,
 'E-LOC': 13,
 'E-MISC': 15,
 'E-ORG': 14,
 'E-PER': 12,
 'I-LOC': 9,
 'I-MISC': 11,
 'I-ORG': 10,
 'I-PER': 8,
 'O': 18,
 'S-LOC': 1,
 'S-MISC': 3,
 'S-ORG': 2,
 'S-PER': 0}

In [8]:
scrf_table

{'<START>': 4, '<STOP>': 5, 'LOC': 1, 'MISC': 3, 'O': 6, 'ORG': 2, 'PER': 0}

In [53]:
(np.arange(10).reshape(2,5) > np.ones((2,5))).mean()

0.8

In [46]:
np.ones((2,5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [56]:
(np.array([[1,1],[2,2]])==np.array([[1,1],[1,1]]))

array([[ True,  True],
       [False, False]])

In [57]:
V(np.arange(10).reshape(2,5)).data.cpu().numpy()

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])