<h3>Import Necessary Libraries</h3>

In [82]:
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM,Embedding
from keras.layers import Dense
import tensorflow as tf
from keras.optimizers import Adam
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import os


<h3>Read word</h3>

In [83]:
def read_words(filename):
  with tf.io.gfile.GFile(filename,'r') as f:
    return f.read().replace("\n","<eos>").split()

<h3>File to words_id</h3>

In [84]:
def file_to_words(filename,words_id):
  data=read_words(filename)
  return [words_id[word] for word in data if word in words_id]

<h3>Build Vocabulary</h3>

In [85]:
def build_vocabulary(filename):
  data = read_words(filename)
  counter=collections.Counter(data)
  count_pairs=sorted(counter.items(),key=lambda x:(-x[1],x[0]))
  words,_=list(zip(*count_pairs))
  words_to_id=dict(zip(words,range(len(words))))
  return words_to_id

<h3>Get Ptb Raw data</h3>

In [86]:
def ptb_raw(data_path=None):
  train_path=os.path.join(data_path,'ptb.train.txt')
  test_path=os.path.join(data_path,'ptb.test.txt')
  valid_path=os.path.join(data_path,'ptb.valid.txt')

  word_to_id=build_vocabulary(train_path)
  train_data=file_to_words(train_path,word_to_id)
  test_data=file_to_words(test_path,word_to_id)
  valid_data=file_to_words(valid_path,word_to_id)
  vocabulary=len(word_to_id)

  return word_to_id,train_data,test_data,valid_data,vocabulary

<h3>Iterate on raw ptb data</h3>

In [87]:
def ptb_iterator(raw_data,batch_size,num_steps):
  raw_data = np.array(raw_data, dtype=np.int32)
  data_len=len(raw_data)
  batch_len=data_len//batch_size
  data=np.zeros([batch_size,batch_len],dtype=np.int32)
  for i in range(batch_size):
    data[i]=raw_data[batch_len*i:batch_len*(i+1)]
  epoch_size=(batch_len-1)//num_steps
  if epoch_size == 0:
    raise ValueError("epoch_size == 0, decrease batch_size or num_steps")
  for i in range(epoch_size):
      x = data[:, i*num_steps:(i+1)*num_steps]
      y = data[:, i*num_steps+1:(i+1)*num_steps+1]
      yield (x, y)



<h3>Initialize important variables</h3>

In [88]:
init_weight_scale=0.1
learning_rate=0.01
max_grad_norm=5
num_layer=2
num_steps=25
hidden_size1=256
hidden_size2=128
max_epoch_decay_lr = 4
max_epoch = 25
keep_prob = 1
decay = 0.02
batch_size = 30
vocab_size = 10000
embeding_vector_size= 200
is_training = 1
data_dir = "F:\Machine learning\simple-examples\data"

<h3>Build Model</h3>

In [89]:
class MODEL(object):
  def __init__(self):
    self.batch_size = batch_size
    self.num_steps = num_steps
    self.hidden_size_l1 = hidden_size1
    self.hidden_size_l2 = hidden_size2
    self.vocab_size = vocab_size
    self.embeding_vector_size = embeding_vector_size
    self._lr = 1

    self.model=Sequential()
    self.embedding_layer = tf.keras.layers.Embedding(self.vocab_size, self.embeding_vector_size,batch_input_shape=(self.batch_size, self.num_steps),trainable=True,name="embedding_vocab")  #[10000x200]
    self.model.add(self.embedding_layer)
    lstm_cell_l1 = tf.keras.layers.LSTMCell(hidden_size1)
    lstm_cell_l2 = tf.keras.layers.LSTMCell(hidden_size2)
    stacked_lstm = tf.keras.layers.StackedRNNCells([lstm_cell_l1, lstm_cell_l2])
    self.RNNlayer  =  tf.keras.layers.RNN(stacked_lstm,[batch_size, num_steps],return_state=False,stateful=True,trainable=True)
    self._initial_state = tf.Variable(tf.zeros([batch_size,embeding_vector_size]),trainable=False)
    self.RNNlayer.inital_state = self._initial_state
    self.model.add(self.RNNlayer)
    self.dense = tf.keras.layers.Dense(self.vocab_size)
    self.model.add(self.dense)
    self.activation = tf.keras.layers.Activation('softmax')
    self.model.add(self.activation)
    self.optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, clipvalue=max_grad_norm)
    self.model.compile(loss=self.crossentropy, optimizer=self.optimizer)
    self.model.summary()

  def crossentropy(self,y_true, y_pred):
    return tf.keras.losses.sparse_categorical_crossentropy(y_true,y_pred)
    
  def train_batch(self,input_data,targets):
    self.lr = tf.Variable(0.0, trainable=False)
    tvars = self.model.trainable_variables
    with tf.GradientTape() as tape:
      output_words_prob = self.model(input_data)
      loss  = self.crossentropy(targets, output_words_prob)
      cost = tf.reduce_sum(loss/ batch_size)
      grad_t_list = tape.gradient(cost, tvars)
      grads, _ = tf.clip_by_global_norm(grad_t_list, max_grad_norm)
      train_op = self.optimizer.apply_gradients(zip(grads, tvars))
      return cost
  def test_batch(self,_input_data,_targets):
        output_words_prob = self.model(_input_data)
        loss  = self.crossentropy(_targets, output_words_prob)
        cost = tf.reduce_sum(loss/ self.batch_size)
        return cost
  @classmethod
  def instance(cls) : 
    return MODEL()




In [90]:
import time


def run_one_epoch(m, data,is_training=True,verbose=False):

    
    epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps
    start_time = time.time()
    costs = 0.
    iters = 0
    
    m.model.reset_states()
    
    for step, (x, y) in enumerate(ptb_iterator(data, m.batch_size, m.num_steps)):
        
        if is_training : 
            loss=  m.train_batch(x, y)
        else :
            loss = m.test_batch(x, y)
                                   
        costs += loss
        iters += m.num_steps

        if verbose and step % (epoch_size // 10) == 10:
            print("Itr %d of %d, perplexity: %.3f speed: %.0f wps" % (step , epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time)))

    return np.exp(costs / iters)

<h3>Read the data and separates it into training data, validation data and testing data</h3>

In [91]:
raw_data = ptb_raw(data_dir)
print("Type of raw_data:", type(raw_data))  # Debugging print statement
train_data, valid_data, test_data, _, _ = raw_data
print("Type of train_data before conversion:", type(train_data)) 
train_data = [int(value) for value in train_data.values()] # Debugging print statement
train_data = np.array(train_data, dtype=np.int32)
print("Type of train_data after conversion:", type(train_data))  # Debugging print statement
valid_data = np.array(valid_data, dtype=np.int32)
test_data = np.array(test_data, dtype=np.int32)

Type of raw_data: <class 'tuple'>
Type of train_data before conversion: <class 'dict'>
Type of train_data after conversion: <class 'numpy.ndarray'>


<h3>Instantiates the PTBModel class</h3>

In [92]:
m=MODEL.instance()   
K = tf.keras.backend 
for i in range(max_epoch):
    lr_decay = decay ** max(i - max_epoch_decay_lr, 0.0)
    dcr = learning_rate * lr_decay
    m.lr = dcr
    K.set_value(m.model.optimizer.learning_rate,m.lr)
    print("Epoch %d : Learning rate: %.3f" % (i + 1, m.model.optimizer.learning_rate))

    train_perplexity = run_one_epoch(m, train_data,is_training=True,verbose=True)
    print("Epoch %d : Train Perplexity: %.3f" % (i + 1, train_perplexity))
        
    # Run the loop for this epoch in the validation mode
    valid_perplexity = run_one_epoch(m, valid_data,is_training=False,verbose=False)
    print("Epoch %d : Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
    
# Run the loop in the testing mode to see how effective was our training
test_perplexity = run_one_epoch(m, test_data,is_training=False,verbose=False)
print("Test Perplexity: %.3f" % test_perplexity)

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_vocab (Embedding  (30, 25, 200)             2000000   
 )                                                               
                                                                 
 rnn_7 (RNN)                 (30, 25, 128)             671088    
                                                                 
 dense_7 (Dense)             (30, 25, 10000)           1290000   
                                                                 
 activation_7 (Activation)   (30, 25, 10000)           0         
                                                                 
Total params: 3961088 (15.11 MB)
Trainable params: 3955088 (15.09 MB)
Non-trainable params: 6000 (23.44 KB)
_________________________________________________________________
Epoch 1 : Learning rate: 1.000
Epoch 1 : Train Perplexity: 10153.188
Epoch 1 :

KeyboardInterrupt: 