In [30]:
import mxnet as mx
from mxnet import gluon as gl
from mxnet import initializer as init
from mxnet.gluon.data import ArrayDataset, DataLoader
import numpy as np
import random
import sys
import time
import datetime
from data import load_imdb
import LoggerYN as YN
import tensorflow as tf
import unicodedata
from sklearn.model_selection import train_test_split
import re
import os
from data import load_ptb, load_ptb_vocab


In [31]:
def run_imdb(n_epochs):
    
    np_load_old = np.load

    # modify the default parameters of np.load
    np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)
    
    class ImdbDataset(ArrayDataset):

        def __init__(self, train, vocabulary_size, seq_len):
            x, y = load_imdb(train, vocabulary_size, seq_len)

            lens = mx.nd.array([len(xi) for xi in x])
            x = mx.nd.array([np.pad(xi, (0, seq_len - len(xi)), 'constant') for xi in x], dtype=int)
            y = mx.nd.array(y)
            super().__init__(x, lens, y)


    # In[3]:


    class ImdbLstm(gl.Block):

        def __init__(self, vocabulary_size, embedding_dim, hidden_size):
            super().__init__()
            with self.name_scope():
                self.embed = gl.nn.Embedding(input_dim=vocabulary_size, output_dim=embedding_dim,
                                             weight_initializer=init.Uniform(1.0))
                self.lstm = gl.rnn.LSTMCell(input_size=embedding_dim, hidden_size=hidden_size,
                                            i2h_weight_initializer=init.Xavier(),
                                            h2h_weight_initializer=init.Orthogonal())
                self.fc = gl.nn.Dense(in_units=hidden_size, units=1,
                                      weight_initializer=init.Xavier())

        def forward(self, inputs):
            x, lens = inputs
            x = self.embed(x)
            o, (h, c) = self.lstm.unroll(x.shape[1], x, valid_length=lens)
            f = self.fc(h)
            return f.reshape(-1)


    # In[4]:


    def imdb_train(model, data_loader, criterion, trainer, epoch, print_every=100):
        losses = []
        for i, (seqs, lens, labels) in enumerate(data_loader):
            with mx.autograd.record():
                outputs = model((seqs, lens))
                loss = criterion(outputs, labels)
                loss.backward()
            trainer.step(batch_size=labels.shape[0])

            losses.append(loss.mean().asscalar())
            if (i + 1) % print_every == 0:
                print('[%d, %5d] train loss: %.3f' % (epoch, i + 1, np.mean(losses)))
                losses = []


    def imdb_test(model, data_loader, criterion, epoch):
        accuracy = mx.metric.Accuracy()
        losses = []
        for seqs, lens, labels in data_loader:
            outputs = model((seqs, lens))
            loss = criterion(outputs, labels)

            losses.append(mx.nd.mean(loss).asscalar())
            preds = (outputs >= 0.0)
            accuracy.update(labels, preds)

        print('[%d] test loss: %.3f' % (epoch, np.mean(losses)))
        print('[%d] accuracy: %.3f' % (epoch, accuracy.get()[1] * 100))


    def imdb_run(n_epochs,vocabulary_size, seq_len, batch_size, embedding_size, hidden_size):
        mx.random.seed(1)
        np.random.seed(1)
        random.seed(1)


        train_dataset = ImdbDataset(train=True, vocabulary_size=vocabulary_size, seq_len=seq_len)
        test_dataset = ImdbDataset(train=False, vocabulary_size=vocabulary_size, seq_len=seq_len)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=batch_size)

        model = ImdbLstm(vocabulary_size, embedding_size, hidden_size)
        model.initialize()
        criterion = gl.loss.SigmoidBCELoss()
        trainer = gl.Trainer(model.collect_params(), mx.optimizer.Adam())

        memT,cpuT,gpuT = YN.StartLogger("MXNet_CPU","IMDB")
        start = time.time()
        current_time = time.time()
        time_consumed=current_time-start
        epoch=1

        while (time_consumed <= 86400 and epoch <= n_epochs):
            imdb_train(model, train_loader, criterion, trainer, epoch)
            imdb_test(model, test_loader, criterion, epoch)
            epoch += 1
            time_consumed=(time.time())-start
            print("Time since beginning ", str(datetime.timedelta(seconds=time_consumed)) )
            sys.stdout.flush()
            if epoch % 10== 0:
                model.save_parameters('MXNet_CPU_IMDB_LSTM_model')

        end = time.time()
        YN.EndLogger(memT,cpuT,gpuT)
        print("\n\nTotal Time Consumed ", str(datetime.timedelta(seconds=time_consumed)))


    # In[5]:


    imdb_run(n_epochs,vocabulary_size = 5000, seq_len = 500, batch_size = 64, embedding_size = 32, hidden_size = 100)


    # In[ ]:






In [32]:
def run_manythings(n_epochs):
    
    # Converts the unicode file to ascii
    def unicode_to_ascii(s):
        return ''.join(c for c in unicodedata.normalize('NFD', s)
            if unicodedata.category(c) != 'Mn')


    def preprocess_sentence(w):
        w = unicode_to_ascii(w.lower().strip())

        # creating a space between a word and the punctuation following it
        # eg: "he is a boy." => "he is a boy ." 
        # Reference:- https://stackoverflow.com/questions/3645931/python-padding-punctuation-with-white-spaces-keeping-punctuation
        w = re.sub(r"([?.!,¿])", r" \1 ", w)
        w = re.sub(r'[" "]+', " ", w)

        # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
        w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)

        w = w.rstrip().strip()

        # adding a start and an end token to the sentence
        # so that the model know when to start and stop predicting.
        w = '<start> ' + w + ' <end>'
        return w

    # 1. Remove the accents
    # 2. Clean the sentences
    # 3. Return word pairs in the format: [ENGLISH, SPANISH]
    def create_dataset(path):
        lines = open(path, encoding='UTF-8').read().strip().split('\n')

        word_pairs = [[preprocess_sentence(w) for w in l.split('\t')]  for l in lines[:100000]]

        return word_pairs

        # This class creates a word -> index mapping (e.g,. "dad" -> 5) and vice-versa 
    # (e.g., 5 -> "dad") for each language,
    class LanguageIndex():
        def __init__(self, lang):
            self.lang = lang
            self.word2idx = {}
            self.idx2word = {}
            self.vocab = set()

            self.create_index()

        def create_index(self):
            for phrase in self.lang:
                self.vocab.update(phrase.split(' '))

            self.vocab = sorted(self.vocab)

            self.word2idx['<pad>'] = 0
            for index, word in enumerate(self.vocab):
                self.word2idx[word] = index + 1

            for word, index in self.word2idx.items():
                self.idx2word[index] = word
    def max_length(tensor):
        return max(len(t) for t in tensor)


    def load_dataset(path):
        # creating cleaned input, output pairs
        pairs = create_dataset(path)

        # index language using the class defined above    
        inp_lang = LanguageIndex(sp for en, sp in pairs)
        targ_lang = LanguageIndex(en for en, sp in pairs)

        # Vectorize the input and target languages

        # Spanish sentences
        input_tensor = [[inp_lang.word2idx[s] for s in sp.split(' ')] for en, sp in pairs]

        # English sentences
        target_tensor = [[targ_lang.word2idx[s] for s in en.split(' ')] for en, sp in pairs]

        # Calculate max_length of input and output tensor
        # Here, we'll set those to the longest sentence in the dataset
        max_length_inp, max_length_tar = max_length(input_tensor), max_length(target_tensor)

        # Padding the input and output tensor to the maximum length
        input_tensor = tf.keras.preprocessing.sequence.pad_sequences(input_tensor, 
                                                                     maxlen=max_length_inp,
                                                                     padding='post')

        target_tensor = tf.keras.preprocessing.sequence.pad_sequences(target_tensor, 
                                                                      maxlen=max_length_tar, 
                                                                      padding='post')

        return input_tensor, target_tensor, inp_lang, targ_lang, max_length_inp, max_length_tar


    # In[3]:


    def create_db(path_to_file):
        input_tensor, target_tensor, inp_lang, targ_lang, max_length_inp, max_length_targ = load_dataset(path_to_file)
        # Creating training and validation sets using an 80-20 split
        input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2,random_state=42)
        vocab_inp_size = len(inp_lang.word2idx)
        vocab_tar_size = len(targ_lang.word2idx)
        return input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val,vocab_inp_size,vocab_tar_size,max_length_inp, max_length_targ


    # In[4]:


    class Encoder(gl.Block):
        def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
            super().__init__()
            self.batch_sz = batch_sz
            self.enc_units = enc_units
            self.embedding = gl.nn.Embedding(input_dim=vocab_size, output_dim=embedding_dim, weight_initializer=init.Uniform(1.0))
            self.LSTM = gl.rnn.LSTM(hidden_size=enc_units,layout ='NTC',input_size = embedding_dim, i2h_weight_initializer=init.Xavier(),
                                            h2h_weight_initializer=init.Orthogonal())

        def forward(self, x, hidden):
            x = self.embedding(x)
            output, state = self.LSTM(x,hidden)
            return output, state
        def init_hidden(self):
            return mx.nd.zeros((1,self.batch_sz,self.enc_units))


    class Decoder(gl.Block):
        def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
            super().__init__()
            self.batch_sz = batch_sz
            self.dec_units = dec_units
            self.embedding = gl.nn.Embedding(input_dim=vocab_size, output_dim=embedding_dim, weight_initializer=init.Uniform(1.0))
            self.LSTM = gl.rnn.LSTM(hidden_size=dec_units,layout='NTC',input_size = embedding_dim,i2h_weight_initializer=init.Xavier(),
                                            h2h_weight_initializer=init.Orthogonal())
            self.fc = gl.nn.Dense(units=vocab_size,flatten =False,weight_initializer=init.Xavier())


        def forward(self, x, hidden, enc_output):
            x = self.embedding(x)
            output, state = self.LSTM(x,hidden)
            x = self.fc(output)
            return x, state




    # In[5]:


    class Encap(gl.Block):
        def __init__(self, encoder,decoder):
            super().__init__()
            self.encoder = encoder
            self.decoder = decoder

        def forward(self, inp,targ, hidden, BATCH_SIZE,vocab):
            loss = 0
            enc_output, enc_hidden = self.encoder(inp, [hidden,hidden])

            dec_hidden = enc_hidden

            dec_input = targ[:,:-1]

            predictions, dec_hidden = self.decoder(dec_input, dec_hidden, enc_output)
            loss = loss_function(targ[:,1:], predictions,vocab)
            accuracy=acc_function(targ[:,1:], predictions,vocab)

            return [loss,accuracy]


    # In[6]:


    def loss_function(real, pred,vocab):
        loss_c = gl.loss.SoftmaxCrossEntropyLoss(axis=-1, sparse_label=True, from_logits=False, weight=None)
        real = real.reshape(-1)
        pred = pred.reshape(-1,vocab)
        return (loss_c(pred,real)).mean()


    # In[7]:


    def acc_function(real, pred,vocab):
        pred = pred.reshape(-1,vocab)
        real = real.reshape(-1)
        pred = np.argmax(pred,1)
        acc = mx.metric.Accuracy()
        acc.update(pred, real)
        return ((acc.get())[1])


    # In[8]:


    def train(data_iter,model,hidden,batch_size,vocab_tar_size,trainer):
        for batch, dat in enumerate(data_iter):
            inp = dat.data[0]
            targ = dat.label[0]
            #batch_size = batch_size.as_in_context(ctx)
            #vocab_tar_size = vocab_tar_size.as_in_context(ctx)
            with mx.autograd.record():
                result = model(inp,targ,hidden,batch_size,vocab_tar_size)
            loss=result[0]
            acc=result[1]
            loss.backward()
            trainer.step(batch_size = batch_size)

            if batch % 600 == 0:
                print('Batch {} Loss {}'.format(batch,loss.asnumpy()[0]))
                print('Batch {} Accuracy {}'.format(batch,acc))
                sys.stdout.flush()

        data_iter.reset()


    # In[9]:


    def test_old(val_iter,model,hidden,batch_size,vocab_tar_size):
        val_loss = 0
        for batch, dat in enumerate(val_iter):
            inp = dat.data[0]
            targ = dat.label[0]
            loss = model(inp,targ,hidden,batch_size,vocab_tar_size)
            val_loss += loss.asnumpy()[0]
        print("Validation Perplexity :",np.power(2,val_loss/batch))
        val_iter.reset()

    def test(val_iter,model,hidden,batch_size,vocab_tar_size):
        val_loss = 0
        t_acc = 0
        for batch, dat in enumerate(val_iter):
            inp = dat.data[0]
            targ = dat.label[0]
            result = model(inp,targ,hidden,batch_size,vocab_tar_size)
            val_loss += (result[0]).asnumpy()[0]
            t_acc += result[1]
        print("Validation Loss :",(val_loss/batch))  
        print("Validation Perplexity :",np.power(2,val_loss/batch))
        print("Validation Acc :",t_acc/batch)
        sys.stdout.flush()
        val_iter.reset()


    # In[10]:


    def run_tr(n_epochs, BATCH_SIZE, embedding_dim, units):
        path_to_zip = tf.keras.utils.get_file('spa-eng.zip', origin='http://download.tensorflow.org/data/spa-eng.zip', extract=True)
        path_to_file = os.path.dirname(path_to_zip)+"/spa-eng/spa.txt"
        mx.random.seed(1)
        np.random.seed(1)
        input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val,vocab_inp_size,vocab_tar_size,max_length_inp, max_length_targ = create_db(path_to_file)
        BUFFER_SIZE = len(input_tensor_train)
        BATCH_SIZE = 128
        data_iter = mx.io.NDArrayIter(mx.nd.array(input_tensor_train),mx.nd.array(target_tensor_train),BATCH_SIZE,True)
        val_iter = mx.io.NDArrayIter(mx.nd.array(input_tensor_val),mx.nd.array(target_tensor_val),BATCH_SIZE,True)
        N_BATCH = BUFFER_SIZE//BATCH_SIZE
        embedding_dim = 256
        units = 256
        train_samples = len(input_tensor_train)
        val_samples = len(input_tensor_val)
        encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)
        hidden = encoder.init_hidden()
        decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)
        model = Encap(encoder,decoder)
        model.initialize()
        trainer = gl.Trainer(model.collect_params(), mx.optimizer.Adam(learning_rate=0.0001))
        start = time.time()
        memT,cpuT,gpuT = YN.StartLogger("MXNet_CPU","Manythings")
        start = time.time()
        current_time = time.time()
        time_consumed=current_time-start
        epoch=1
        while (time_consumed <= 86400 and epoch <= n_epochs):
            print("\n\nEpoch ",epoch)
            print("Time since beginning ", str(datetime.timedelta(seconds=time_consumed)) )
            train(data_iter,model,hidden,BATCH_SIZE,vocab_tar_size,trainer)
            test(val_iter,model,hidden,BATCH_SIZE,vocab_tar_size)
            epoch += 1
            time_consumed=(time.time())-start
            sys.stdout.flush()
            if epoch % 5 == 0:
                model.save_parameters('MXNet_CPU_Translation_LSTM_model')
        end = time.time()
        YN.EndLogger(memT,cpuT,gpuT)
        print("\n\nTotal Time Consumed ", str(datetime.timedelta(seconds=end-start)))



    # In[11]:


    run_tr(n_epochs, BATCH_SIZE = 128, embedding_dim = 256, units = 256)


    # In[ ]:





# In[ ]:


In [33]:
def run_ptb(n_epochs):

    class PtbIterator():

        def __init__(self, train, batch_size, seq_len, skip_step=5):
            self.data = load_ptb(train)
            self.batch_size = batch_size
            self.seq_len = seq_len
            self.skip_step = skip_step
            self.reset()

        def __iter__(self):
            self.reset()
            return self

        def __next__(self):
            x = mx.nd.empty((self.batch_size, self.seq_len), dtype=np.int32)
            y = mx.nd.empty((self.batch_size, self.seq_len), dtype=np.int32)

            for i in range(self.batch_size):
                if self.cur_idx + self.seq_len >= len(self.data):
                    raise StopIteration
                x[i, :] = self.data[self.cur_idx:self.cur_idx+self.seq_len]
                y[i, :] = self.data[self.cur_idx+1:self.cur_idx+self.seq_len+1]
                self.cur_idx += self.skip_step
            return x, y

        def reset(self):
            self.cur_idx = 0


    # In[8]:


    class PtbLstm(gl.Block):

        def __init__(self, vocabulary_size, hidden_size, num_layers, dropout):
            super().__init__()
            with self.name_scope():
                self.embed = gl.nn.Embedding(input_dim=vocabulary_size, output_dim=hidden_size,
                                             weight_initializer=init.Uniform(1.0))
                self.lstm = gl.rnn.LSTM(input_size=hidden_size, hidden_size=hidden_size, 
                                        num_layers=num_layers, layout='TNC',
                                        i2h_weight_initializer=init.Xavier(),
                                        h2h_weight_initializer=init.Orthogonal())
                self.dropout = gl.nn.Dropout(rate=dropout)
                self.fc = gl.nn.Dense(in_units=hidden_size, units=vocabulary_size, flatten=False,
                                      weight_initializer=init.Xavier())

        def forward(self, x):
            x = self.embed(x)
            o = self.lstm(x)
            o = self.dropout(o)
            f = self.fc(o)
            return f


    # In[9]:


    def ptb_train(model, data_iter, criterion, trainer, epoch, print_every=1000):
        losses = []
        t_acc = 0
        total = 0
        for i, (inputs, labels) in enumerate(data_iter):
            with mx.autograd.record():
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                outputs=np.argmax(outputs,-1)
                acc = mx.metric.Accuracy()
                acc.update(outputs, labels)
                t_acc += (acc.get())[1]
                total += 1
            losses.append(loss.mean().asscalar())
            trainer.step(batch_size=labels.shape[0])

            if (i + 1) % print_every == 0:
                print('[%d, %5d] train loss: %.3f' % (epoch, i + 1, np.mean(losses)))
                print('[%d, %5d] train acc: %.3f' % (epoch, i + 1, t_acc/total))
                losses = []
                t_acc = 0
                total = 0
                sys.stdout.flush()



    def ptb_test(model, data_iter, criterion, epoch):
        losses = []
        t_acc = 0
        total = 0
        for inputs, labels in data_iter:
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            losses.append(mx.nd.mean(loss).asscalar())

            outputs=np.argmax(outputs,-1)
            acc = mx.metric.Accuracy()
            acc.update(outputs, labels)
            t_acc += (acc.get())[1]
            total += 1


        loss = np.mean(losses)
        perplexity = np.exp(loss)
        print('[%d] test loss: %.3f perplexity: %.3f' % (epoch, loss, perplexity))
        print('[%d] test acc: %.3f ' % (epoch, t_acc/total))
        sys.stdout.flush()




    def ptb_run(n_epochs, hidden_size, batch_size, seq_len, dropout, num_layers):
        mx.random.seed(1)
        np.random.seed(1)
        random.seed(1)

        ptb_vocab = load_ptb_vocab()
        vocabulary_size = len(ptb_vocab)

        train_iter = PtbIterator(train=True, batch_size=batch_size, seq_len=seq_len)
        test_iter = PtbIterator(train=False, batch_size=batch_size, seq_len=seq_len)

        model = PtbLstm(vocabulary_size, hidden_size, num_layers, dropout)
        model.initialize()
        criterion = gl.loss.SoftmaxCrossEntropyLoss()
        trainer = gl.Trainer(model.collect_params(), mx.optimizer.AdaDelta(rho=0.95, epsilon=1e-06))

        memT,cpuT,gpuT = YN.StartLogger("MXNet_CPU", "PTB")
        start = time.time()
        current_time = time.time()
        time_consumed=current_time-start
        epoch=1

        while (time_consumed <= 86400 and epoch <= n_epochs):
            ptb_train(model, train_iter, criterion, trainer, epoch)
            ptb_test(model, test_iter, criterion, epoch)
            epoch += 1
            time_consumed=(time.time())-start
            print("Time since beginning: ", str(datetime.timedelta(seconds=time_consumed)) )
            sys.stdout.flush()
            if epoch % 10 == 0:
                model.save_parameters('MXNet_CPU_PTB_LSTM_model')

        end = time.time()
        YN.EndLogger(memT,cpuT,gpuT)
        print("\n\nTotal Time Consumed ", str(datetime.timedelta(seconds=end-start)))
        model.save_parameters('MXNet_CPU_PTB_LSTM_model')




    ptb_run(n_epochs, hidden_size = 200, batch_size = 20, seq_len = 30, dropout = 0.5, num_layers = 2)








In [None]:
#run_imdb(n_epochs=50)
#run_manythings(n_epochs=100)
run_ptb(n_epochs=50)