## Change dir

## Import Necessary packages

In [1]:
import argparse
import time
import math
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.onnx
from io import open

In [2]:
data_loc='./' 
model_name='LSTM' #'type of network (RNN_TANH, RNN_RELU, LSTM, GRU, Transformer)')
emsize=200 #'size of word embeddings')
nhid=200 #'number of hidden units per layer')
nlayers=2 #'number of layers')
lr=1 #'initial learning rate')
clip=0.25 #'gradient clipping')
epochs=30 #'upper epoch limit')
batch_size=20  #'batch size')
bptt=35 #'sequence length')
dropout=0.2 #'dropout applied to layers (0 = no dropout)')
seed=1111 #'random seed')
cuda=True #'use CUDA')
tied=False
mps=False#    help='enables macOS GPU training')
log_interval=200  #'report interval')
saved_model='model.pt' #'path to save the final model')
onnx_export='' #'path to export the final model in onnx format')
nhead=2 #'the number of heads in the encoder/decoder of the transformer model')
dry_run=False

In [3]:
torch.manual_seed(seed)
if torch.cuda.is_available():
    if not cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda.")
if cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [4]:
class Dictionary(object):
    def __init__(self):
        self.word2idx = {}
        self.idx2word = []

    def add_word(self, word):
        if word not in self.word2idx:
            self.idx2word.append(word)
            self.word2idx[word] = len(self.idx2word) - 1
        return self.word2idx[word]

    def __len__(self):
        return len(self.idx2word)


class Corpus(object):
    def __init__(self, path):
        self.dictionary = Dictionary()
        self.train = self.tokenize(os.path.join(path, 'train_inj.txt'))
        self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
        self.test = self.tokenize(os.path.join(path, 'test.txt'))
    def tokenize_single(self, inp):
        words = inp.split() + ['<eos>']
        ids = []
        for word in words:
          self.dictionary.add_word(word)
          ids.append(self.dictionary.word2idx[word])
        return torch.tensor([ids]).type(torch.int64)
    def tokenize(self, path):
        """Tokenizes a text file."""
        assert os.path.exists(path)
        # Add words to the dictionary
        with open(path, 'r', encoding="utf8") as f:
            for line in f:
                words = line.split() + ['<eos>']
                for word in words:
                    self.dictionary.add_word(word)

        # Tokenize file content
        with open(path, 'r', encoding="utf8") as f:
            idss = []
            for line in f:
                words = line.split() + ['<eos>']
                ids = []
                for word in words:
                    ids.append(self.dictionary.word2idx[word])
                idss.append(torch.tensor(ids).type(torch.int64))
            ids = torch.cat(idss)

        return ids

## Set the random seed manually for reproducibility.

## Load Data

In [5]:
corpus = Corpus(data_loc)

### batchify arranges the dataset into columns

In [6]:
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)

In [7]:
eval_batch_size = 10
train_data = batchify(corpus.train, batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

In [8]:
class RNNModel(nn.Module):
    """Container module with an encoder, a recurrent module, and a decoder."""

    def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False):
        super(RNNModel, self).__init__()
        self.ntoken = ntoken
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        if rnn_type in ['LSTM', 'GRU']:
            self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout)
        else:
            try:
                nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type]
            except KeyError:
                raise ValueError( """An invalid option for `--model` was supplied,
                                 options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""")
            self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout)
        self.decoder = nn.Linear(nhid, ntoken)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            if nhid != ninp:
                raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.nhid = nhid
        self.nlayers = nlayers

    def init_weights(self):
        initrange = 0.1
        nn.init.uniform_(self.encoder.weight, -initrange, initrange)
        nn.init.zeros_(self.decoder.bias)
        nn.init.uniform_(self.decoder.weight, -initrange, initrange)

    def forward(self, input, hidden):
        emb = self.drop(self.encoder(input))
        output, hidden = self.rnn(emb, hidden)
        output = self.drop(output)
        decoded = self.decoder(output)
        decoded = decoded.view(-1, self.ntoken)
        return F.log_softmax(decoded, dim=1), hidden

    def init_hidden(self, bsz):
        weight = next(self.parameters())
        if self.rnn_type == 'LSTM':
            return (weight.new_zeros(self.nlayers, bsz, self.nhid),
                    weight.new_zeros(self.nlayers, bsz, self.nhid))
        else:
            return weight.new_zeros(self.nlayers, bsz, self.nhid)

# Temporarily leave PositionalEncoding module here. Will be moved somewhere else.
class PositionalEncoding(nn.Module):
    r"""Inject some information about the relative or absolute position of the tokens in the sequence.
        The positional encodings have the same dimension as the embeddings, so that the two can be summed.
        Here, we use sine and cosine functions of different frequencies.
    .. math:
        \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model))
        \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model))
        \text{where pos is the word position and i is the embed idx)
    Args:
        d_model: the embed dim (required).
        dropout: the dropout value (default=0.1).
        max_len: the max. length of the incoming sequence (default=5000).
    Examples:
        >>> pos_encoder = PositionalEncoding(d_model)
    """

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        r"""Inputs of forward function
        Args:
            x: the sequence fed to the positional encoder model (required).
        Shape:
            x: [sequence length, batch size, embed dim]
            output: [sequence length, batch size, embed dim]
        Examples:
            >>> output = pos_encoder(x)
        """

        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

class TransformerModel(nn.Module):
    """Container module with an encoder, a recurrent or transformer module, and a decoder."""

    def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5):
        super(TransformerModel, self).__init__()
        try:
            from torch.nn import TransformerEncoder, TransformerEncoderLayer
        except:
            raise ImportError('TransformerEncoder module does not exist in PyTorch 1.1 or lower.')
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(ninp, dropout)
        encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.encoder = nn.Embedding(ntoken, ninp)
        self.ninp = ninp
        self.decoder = nn.Linear(ninp, ntoken)

        self.init_weights()

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def init_weights(self):
        initrange = 0.1
        nn.init.uniform_(self.encoder.weight, -initrange, initrange)
        nn.init.zeros_(self.decoder.bias)
        nn.init.uniform_(self.decoder.weight, -initrange, initrange)

    def forward(self, src, has_mask=True):
        if has_mask:
            device = src.device
            if self.src_mask is None or self.src_mask.size(0) != len(src):
                mask = self._generate_square_subsequent_mask(len(src)).to(device)
                self.src_mask = mask
        else:
            self.src_mask = None

        src = self.encoder(src) * math.sqrt(self.ninp)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src, self.src_mask)
        output = self.decoder(output)
        return F.log_softmax(output, dim=-1)

## Build the Model

In [9]:
ntokens = len(corpus.dictionary)
if model_name == 'Transformer':
    model = TransformerModel(ntokens, emsize, nhead, nhid, nlayers, dropout).to(device)
else:
    model = RNNModel(model_name, ntokens, emsize, nhid, nlayers, dropout, tied).to(device)

criterion = nn.NLLLoss()

## Training

In [10]:
def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""

    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)


def get_batch(source, i):
    seq_len = min(bptt, len(source) - 1 - i)
    data = source[i:i+seq_len]
    target = source[i+1:i+1+seq_len].view(-1)
    return data, target

def evaluate(data_source):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0.
    ntokens = len(corpus.dictionary)
    if model_name != 'Transformer':
        hidden = model.init_hidden(eval_batch_size)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, bptt):
            data, targets = get_batch(data_source, i)
            if model_name == 'Transformer':
                output = model(data)
                output = output.view(-1, ntokens)
            else:
                output, hidden = model(data, hidden)
                hidden = repackage_hidden(hidden)
            total_loss += len(data) * criterion(output, targets).item()
    return total_loss / (len(data_source) - 1)

def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    if model_name != 'Transformer':
        hidden = model.init_hidden(batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        model.zero_grad()
        if model_name == 'Transformer':
            output = model(data)
            output = output.view(-1, ntokens)
        else:
            hidden = repackage_hidden(hidden)
            output, hidden = model(data, hidden)
        loss = criterion(output, targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        for p in model.parameters():
            p.data.add_(p.grad, alpha=-lr)

        total_loss += loss.item()

        if batch % log_interval == 0 and batch > 0:
            cur_loss = total_loss / log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // bptt, lr,
                elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        if dry_run:
            break


In [11]:
def export_onnx(path, batch_size, seq_len):
    print('The model is also exported in ONNX format at {}.'.format(os.path.realpath(onnx_export)))
    model.eval()
    dummy_input = torch.LongTensor(seq_len * batch_size).zero_().view(-1, batch_size).to(device)
    hidden = model.init_hidden(batch_size)
    torch.onnx.export(model, (dummy_input, hidden), path)

# Loop over epochs.

best_val_loss = None

# At any point you can hit Ctrl + C to break out of training early.
try:
    for epoch in range(1, epochs+1):
        epoch_start_time = time.time()
        train()
        val_loss = evaluate(val_data)
        print('-' * 89)
        print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
                                           val_loss, math.exp(val_loss)))
        print('-' * 89)
        # Save the model if the validation loss is the best we've seen so far.
        if not best_val_loss or val_loss < best_val_loss:
            with open(saved_model, 'wb') as f:
                torch.save(model, f)
            best_val_loss = val_loss
        else:
            # Anneal the learning rate if no improvement has been seen in the validation dataset.
            lr /= 4.0
except KeyboardInterrupt:
    print('-' * 89)
    print('Exiting from training early')


| epoch   1 |   200/ 2983 batches | lr 1.00 | ms/batch 22.35 | loss  7.94 | ppl  2812.21
| epoch   1 |   400/ 2983 batches | lr 1.00 | ms/batch 16.31 | loss  7.33 | ppl  1524.73
| epoch   1 |   600/ 2983 batches | lr 1.00 | ms/batch 16.36 | loss  7.25 | ppl  1412.36
| epoch   1 |   800/ 2983 batches | lr 1.00 | ms/batch 16.41 | loss  7.21 | ppl  1350.64
| epoch   1 |  1000/ 2983 batches | lr 1.00 | ms/batch 16.45 | loss  7.21 | ppl  1349.28
| epoch   1 |  1200/ 2983 batches | lr 1.00 | ms/batch 16.49 | loss  7.20 | ppl  1341.61
| epoch   1 |  1400/ 2983 batches | lr 1.00 | ms/batch 17.73 | loss  7.15 | ppl  1279.87
| epoch   1 |  1600/ 2983 batches | lr 1.00 | ms/batch 16.95 | loss  7.16 | ppl  1289.71
| epoch   1 |  1800/ 2983 batches | lr 1.00 | ms/batch 16.64 | loss  7.13 | ppl  1249.99
| epoch   1 |  2000/ 2983 batches | lr 1.00 | ms/batch 16.72 | loss  7.14 | ppl  1264.07
| epoch   1 |  2200/ 2983 batches | lr 1.00 | ms/batch 16.78 | loss  7.15 | ppl  1268.41
| epoch   1 |  2400/ 

## Load the best saved model.

In [15]:

with open(saved_model, 'rb') as f:
    model = torch.load(f)
    # after load the rnn params are not a continuous chunk of memory
    # this makes them a continuous chunk, and will speed up forward pass
    # Currently, only rnn model supports flatten_parameters function.
    if model_name in ['RNN_TANH', 'RNN_RELU', 'LSTM', 'GRU']:
        model.rnn.flatten_parameters()


## Run on test data.

In [12]:

test_loss = evaluate(test_data)
print('=' * 89)
print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
    test_loss, math.exp(test_loss)))
print('=' * 89)

if len(onnx_export) > 0:
    # Export the model in ONNX format.
    export_onnx(onnx_export, batch_size=1, seq_len=bptt)

| End of training | test loss  5.11 | test ppl   166.31


#Generate Text

In [14]:
# Model parameters.
data_loc='./' #help='location of the data corpus'
checkpoint='./model.pt' #help='model checkpoint to use'
outf='generated.txt' #help='output file for generated text')
words=10 #help='number of words to generate')
seed=1111 #help='random seed')
cuda= True #help='use CUDA')
mps=False #help='enables macOS GPU training')
temperature=0.1 #help='temperature - higher will increase diversity')
log_interval=100 #help='reporting interval')

In [15]:
# Set the random seed manually for reproducibility.
torch.manual_seed(seed)
if torch.cuda.is_available():
    if not cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda.")
if torch.backends.mps.is_available():
    if not mps:
        print("WARNING: You have mps device, to enable macOS GPU run with --mps.")
        
use_mps = mps and torch.backends.mps.is_available()
if cuda:
    device = torch.device("cuda")
elif use_mps:
    device = torch.device("mps")
else:
    device = torch.device("cpu")

if temperature < 1e-3:
    parser.error("--temperature has to be greater or equal 1e-3.")


In [16]:
input = torch.randint(ntokens, (1, 1), dtype=torch.long).to(device)
print('input before: ', input)

input before:  tensor([[18564]], device='cuda:0')


In [27]:
len(input[0])

4

In [29]:
txts=""
with open(checkpoint, 'rb') as f:
    model = torch.load(f, map_location=device)
model.eval()

corpus = Corpus(data_loc)
ntokens = len(corpus.dictionary)

is_transformer_model = hasattr(model, 'model_type') and model.model_type == 'Transformer'
input = corpus.tokenize_single("birth date is") 
input=input.to(device)  #torch.randint(ntokens, (1, 1), dtype=torch.long).to(device)
if not is_transformer_model:
    hidden = model.init_hidden(len(input[0]))

print('input before: ', input)
#with open(outf, 'w') as outf:
with torch.no_grad():  # no tracking history
    for i in range(words):
        if is_transformer_model:
            output = model(input, False)
            word_weights = output[-1].squeeze().div(temperature).exp().cpu()
            word_idx = torch.multinomial(word_weights, 1)[0]
            word_tensor = torch.Tensor([[word_idx]]).long().to(device)
            input = torch.cat([input, word_tensor], 0)
        else:
            output, hidden = model(input, hidden)
            print('output:', output)
            word_weights = output.squeeze().div(temperature).exp().cpu()
            #print('word_weights:', word_weights)
            word_idx = torch.multinomial(word_weights, 1)[0]
            #print('word_idx:', word_idx)
            #input.fill_(word_idx)
            print('input after: ', input)

        word = corpus.dictionary.idx2word[word_idx]
        print('word:',word)
        txts= txts+' '+ word   #outf.write(word + ('\n' if i % 20 == 19 else ' '))

        if i % log_interval == 0:
            print('| Generated {}/{} words'.format(i, words))

input before:  tensor([[963, 962,  26,   0]], device='cuda:0')
output: tensor([[ -5.7322,  -8.5891, -10.0856,  ..., -10.8387, -11.0590, -11.0249],
        [ -5.1870,  -8.2396,  -9.9498,  ..., -10.7094, -11.0175, -11.0859],
        [ -8.2082, -11.7987, -10.6106,  ..., -10.5849, -10.8902, -10.9479],
        [ -5.4247,  -7.7961,  -9.6939,  ..., -10.4860, -10.4168, -10.5533]],
       device='cuda:0')
input after:  tensor([[963, 962,  26,   0]], device='cuda:0')
word: ,
| Generated 0/10 words
output: tensor([[ -5.3058,  -8.2991, -10.8480,  ..., -12.4206, -12.6743, -12.4950],
        [ -4.3283,  -7.6440, -11.0347,  ..., -12.6723, -13.1544, -13.1937],
        [ -9.2419, -13.6867, -11.4608,  ..., -11.2025, -11.6599, -11.9416],
        [ -5.6126,  -7.4718,  -9.3702,  ..., -10.9136, -10.4379, -10.8456]],
       device='cuda:0')
input after:  tensor([[963, 962,  26,   0]], device='cuda:0')
word: ,
output: tensor([[ -5.9430,  -8.9350, -11.9283,  ..., -14.4407, -14.6003, -14.3024],
        [ -5.620

In [30]:
txts

' , , . . . . . . . .'

In [11]:
def generate_txt(inp):
  propmpt1=["birthday", "birthdate"]
  prompt2=["born on", "born in"]
  