In [None]:
''' 
    /*----------------------------- AUTHOR_DETAILS -------------
    |
    |   __Project Title__   = Developing a Gender Prediction System using LSTM-based Deep Neural Networks 
    |   
    |   __author__          = Ms. Hira Arshad
    |
    |   __copyright__       = Copyright (C) 2020 Ms. Hira Arshad
    |
    |   __license__         = Public Domain
    |
    |   __version__         = 1.0
    *------------------------------------------------------------
'''
print()




**Note:** Before running the code file set **"drive_path"** variable value depending upon your folder location

In [None]:
''' 
    /*----------------------------- PROJECT_PURPOSE -------------
    | - The main purpose of this program is to demonstrate how LSTM-based Deep Neural Network can be used for the development 
    |   and evaluation of Gender Prediction from Text (i.e. a Binary Classification Problem). For this purpose, Insha Allah, I will execute the Machine Learning Cycle
    *------------------------------------------------------------
'''
print()




# **Machine Learning Cycle**
## **Four phases of a Machine Learning Cycle are**
### **Training Phase**
  * **Build the Model using Training Data**

### **Testing Phase**
  * **Evaluate the performance of Model using Testing Data**

### **Application Phase**
  * **Deploy the Model in Real-world , to make prediction on Real-time unseen Data**
  
### **Feedback Phase**
  * **Take Feedback form the Users and Domain Experts to improve the Model**

# **Steps – Executing Machine Learning Cycle Using Separate Files**
* **Step 1: Import Libraries**
* **Step 2: Load Training Data, Testing Data and Validation Data**
* **Step 3: Understand and Pre-process Training Data, Testing Data and  Validation Data**
* **Step 4: Represent Training Data, Testing Data and Validation Data in Machine Understandable Format**
* **Step 5: Execute the Training Phase**
* **Step 6: Execute the Testing Phase**
* **Step 7: Execute the Application Phase**
* **Step 8: Execute the Feedback Phase**
* **Step 9: Improve Model Based on Feedback**

# **Step 1: Import Libraries**



In [None]:
''' 
    /*----------------------------- IMPORT_LIBRARIES -------------
'''
import os
import re
import time
import spacy
import numpy as np 
import pandas as pd

import torch
import torch.nn as nn
from torchtext import data
import torch.optim as optim
from torchtext import vocab
import torch.nn.functional as F
from torchtext.data import Field
from torch.autograd import Variable
from torchtext.vocab import Vectors
from torchtext.data import TabularDataset
device = torch.device("cuda:0")

Mount Google Drive

In [None]:
''' 
    /*----------------------------- MOUNT_GOOGLE_DRIVE -------------
      - To connect your colab notebook with google drive
'''    
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Step 2: Load Training Data, Testing Data and Validation Data**


In [None]:
''' 
    /*----------------------------- LOAD_DATASET -------------
    | Function  : load_dataset()
    | Purpose   : Reads dataset(s) in CSV file format 
    | Arguments : 
    |       drive_path : Path to dataset file
    |       dataset    : Dataset file name
    | Return    :
    |       dataset    : Dataset in dataframe format
    *---------------------------------------------------------*/
'''
def load_dataset(drive_path, dataset):
  loaded_dataset = pd.read_csv(drive_path + dataset)     # Read CSV file
  print("="*40, "\n", loaded_dataset)                    # Print the dataset that we load in previous step
  return dataset

# **Step 3: Understand and Pre-process Training Data, Testing Data and Validation Data**


### **Step 3.1: Pre-process Text**
* Remove Non-alphanumeric Characters
* Lower Case
* Remove Leading and Trailing Whitespaces


In [None]:
''' 
    /*----------------------------- DATA_PRE-PROCESSING -------------
    | Function  : data_pre-processing()
    | Purpose   : Performs following pre-processing:
    |              •	Remove non-alphanumeric characters
    |              •	Lower case
    |              •	Remove leading and trailing whitespaces
    | Arguments : 
    |       text: Text to be pre-processed
    | Return    :
    |       text: Pre-processed text
    *------------------------------------------------------------------------------------------------*/
'''
def data_pre_processing(text):
      text = re.sub(r'[^A-Za-z0-9]+', ' ', text) # Remove non alphanumeric character
      text = text.lower()                        # Lowercase all text
      return text.strip()                        # Remove leading and trailing whitespaces

### **Step 3.2: Tokenize Text** 

In [None]:
''' 
    /*----------------------------- TOKENIZE_TEXT -------------
    | Function  : data_tokenization()
    | Purpose   : Tokenizes a Text
    | Arguments : 
    |       text: Text to be tokenized
    | Return    :
    |       text: Tokenized Text
    *------------------------------------------------------------------------------------------------*/
'''
def data_tokenization(s):
      tok = spacy.load('en')                                             # Load english tokenizer from spacy
      return [w.text.lower() for w in tok(data_pre_processing(s))]       # Apply pre-processing function (created in previous step) and tokenizer on text

### **Step 3.3: Build Training Data, Testing Data and Validation Data Objects**

In [None]:
''' 
    /*----------------------------- BUILD_DATA_OBJECTS -------------
    | Function  : data_objects()
    | Purpose   : Build pre-processed and tokenized data objects
    | Arguments : 
    |       drive_path : Path of the directory where data files are placed
    | Return    :
    |       pre_processed_training_data, pre_processed_validation_data, pre_processed_testing_data, LABEL, TEXT
    *------------------------------------------------------------------------------------------------*/
'''
def data_objects(drive_path):
  # Declared a Field object 
  # Field : A class that stores information about the way of preprocessing
  TEXT  = Field(sequential= True, tokenize = data_tokenization, lower = True, include_lengths = False, batch_first = False, init_token = '<sos>', 
            eos_token = '<eos>')
  LABEL = data.LabelField(dtype = torch.float)
  # TabularDataset : Defines a dataset of columns. Create a TabularDataset given a path, file format, 
  # and Field list
  training_data, validation_data, testing_data = TabularDataset.splits(path = drive_path + '/Data/',  
                                                            train       = 'train_data.csv', 
                                                            validation  = 'validation_data.csv',
                                                            test        = 'test_data.csv',
                                                            format      = 'csv', 
                                                            fields      = [('Text', TEXT),('Gender', LABEL)], 
                                                            skip_header = True)
  print("\nPre-processed and Tokenized Training Data:")
  print("\n=========================================")
  for i in range(len(training_data)):
    print(training_data[i].Text)
  print("\nPre-processed and Tokenized Validation Data:")
  print("\n=========================================")
  for i in range(len(validation_data)):
    print(validation_data[i].Text)
  print("\nPre-processed and Tokenized Testing Data:")
  print("\n=========================================")
  for i in range(len(testing_data)):
      print(testing_data[i].Text)
  
  return training_data, validation_data, testing_data, LABEL, TEXT

### **Step 3.4: Load Pre-Trained Word Embedding Vectors**

#### Note
**To download the Pre-trained Glove Model**
* **In *Lecture 05 - Developing a Gender Identification (from Text) System using RNN-based Deep Neural Network* See *glove.6B.100d* File in *Pre-trained Glove Model* Folder**

#### Note
**For Lecture 05 - LSTM (Data and Code)**
* **To use the *glove.6B.100d* File (Pre-trained Glove Model) in your Code, Copy the *glove.6B.100d* File in *Lecture 05 - LSTM (Data and Code)* Folder**

In [None]:
''' 
    /*----------------------------- LOAD_WORD_EMBEDDING_VECTORS -------------
    | Function  : load_word_embedding_vectors()
    | Purpose   : Load pre-trained word embedding vectors from memory
    | Arguments : 
    |       drive_path : Path to word embedding vectors file
    | Return    :
    |       vectors     : Loaded word embedding vectors
    *------------------------------------------------------------------------------------------------*/
'''
def load_word_embedding_vectors(drive_path):
  # Load word embedding vectors from memory 
  # I have downloaded the Glove word embedding vectors 100d from internet and saved in my drive 
  # To use that, I simply give the path of that file and read file in my program using vocab.Vectors function
  vectors = vocab.Vectors('glove.6B.100d.txt', drive_path)
  return vectors

### **Step 3.5: Build Vocabulary**

In [None]:
''' 
    /*----------------------------- BUILD_VOCABULARY -------------
    | Function  : build_vocabulary()
    | Purpose   : Build vocabulary from input data
    | Arguments : 
    |       pre_processed_training_data   : Pre-processed training data
    |       pre_processed_validation_data : Pre-processed validation data
    |       pre_processed_testing_data    : Pre-processed testing data
    |       vectors                       : Word embedding vectors 
    |       LABEL                         : LABEL object (Pre-processing applied on output)
    |       TEXT                          : TEXT object (Pre-processing applied on input)
    | Return    :
    |       word_embeddings               : Word embedding vectors mapped on data
    |       vocabulary_size               : Size of vocabulary
    *------------------------------------------------------------------------------------------------*/
'''

def build_vocabulary(training_data, validation_data , testing_data, vectors, LABEL, TEXT):
  # Note: Best practice when working with real world dataset build vocabulary only on Training Data
  TEXT.build_vocab(training_data, validation_data , testing_data, vectors=vectors, unk_init=torch.Tensor.normal_)   # Build vocabulary from input text
  LABEL.build_vocab(training_data, validation_data , testing_data)                   # Build vocabulary from output / labels (Encode all labels)
  
  print("\n=========================================")
  print("Output/Label word to index dictionary: ", LABEL.vocab.stoi)
  print("\n=========================================")
  print("Input Text word to index dictionary:\n ", TEXT.vocab.stoi,"\n")
  
  word_embeddings = TEXT.vocab.vectors   # Load vectors
  vocabulary_size = len(TEXT.vocab)      # Size of vocabulary
  return word_embeddings, vocabulary_size

# **Step 4: Represent Training Data, Testing Data and Validation Data in Machine Understandable Format**

In [None]:
''' 
    /*----------------------------- REPRESENT_DATA_IN_MACHINE_UNDERSTANDABLE_FORMAT -------------
    | Function  : data_iterators()
    | Purpose   : To build input data (Training, validation and testing data) iterators 
    |             (It will convert data into machine understandable format and make data objects which we can iterate over during model training and testing)
    | Arguments : 
    |       pre_processed_training_data   : Pre-processed training data
    |       pre_processed_validation_data : Pre-processed validation data
    |       pre_processed_testing_data    : Pre-processed testing data
    | Return    :
    |       training_iterator   : Training data iterator object
    |       validation_iterator : Validation data iterator object
    |       testing_iterator    : Testing data iterator object
    *------------------------------------------------------------------------------------------------*/
'''

def data_iterators(training_data, validation_data, testing_data):
  # Iterators handle numericalizing, batching, packaging. Basically, it does all the heavy lifting necessary 
  # to pass the data to a neural network
  # BucketIterator : Defines an iterator that batches examples of similar lengths together to minimizes the amount of padding needed
  # By using "splits" it applies processing steps on all datasets equally

  training_iterator, validation_iterator, testing_iterator = data.BucketIterator.splits((training_data, validation_data, testing_data), batch_size=1, sort_key=lambda x: len(x.Text), repeat=False, shuffle=True)
  #print("\nTraining Data Tensors Form\n")
  #print("="*30, "\n")
  #for batch in training_iterator:
  #  print(batch.Text)
  #print("\nValidation Data Tensors Form\n")
  #print("="*30, "\n")
  #for batch in validation_iterator:
  #  print(batch.Text)
  #print("\nTesting Data Tensors Form\n")
  #print("="*30, "\n")
  #for batch in testing_iterator:
  #  print(batch.Text)
  return training_iterator, validation_iterator, testing_iterator

In [None]:
print("+============================Data Preparation============================+\n\n")
drive_path = '/content/drive/My Drive'
print("---Step 2: Load Training Data, Testing Data and Validation Data---")
print("\nTraining data before pre_processing")
original_training_data = load_dataset(drive_path, "/Data/train_data.csv")

print("\n\nValidation data before pre_processing")
original_validation_data = load_dataset(drive_path, "/Data/validation_data.csv")

print("\n\nTesting data before pre_processing")
original_testing_data = load_dataset(drive_path, "/Data/test_data.csv")
print("\n---Step 3: Understand and Pre-process Training Data, Testing Data and Validation Data---")
print("\n---Step 4: Represent Training Data, Testing Data and Validation Data in Machine Understandable Format---")
preprocessed_training_data, preprocessed_validation_data, preprocessed_testing_data, LABEL, TEXT = data_objects(drive_path)
# Load word embedding vectors from memory
vectors = load_word_embedding_vectors(drive_path)
# Build vocabulary
word_embeddings, vocabulary_size = build_vocabulary(preprocessed_training_data, preprocessed_validation_data, preprocessed_testing_data, vectors, LABEL, TEXT)
# Create iterator objects
training_iterator, validation_iterator, testing_iterator = data_iterators(preprocessed_training_data, preprocessed_validation_data, preprocessed_testing_data)



---Step 2: Load Training Data, Testing Data and Validation Data---

Training data before pre_processing
                                                  Text  Gender
0   Your task is not to seek for love, but merely ...    Male
1   You have to keep breaking your heart until it ...  Female
2   Stop acting so small. You are the universe in ...    Male
3   I’ve learned that people will forget what you ...  Female
4                       What you seek is seeking you.    Male
5   You may not control all the events that happen...  Female
6   Don’t grieve. Anything you lose comes round in...    Male
7   We delight in the beauty of the butterfly, but...  Female
8   Yesterday I was clever, so I wanted to change ...    Male
9   If you don’t like something, change it. If you...  Female
10  You were born with wings, why prefer to crawl ...    Male
11           We need much less than we think we need.  Female
12  Don’t be satisfied with stories, how things ha...    Male
13  If I am not good to m

# **Step 5: Execute the Training Phase**

### **Step 5.1:Model Architecture**

In [None]:
''' 
    /*----------------------------- MODEL_ARCHITECTURE -------------
    | Class     : LSTM()
    | Purpose   : To build the architecture of model to be trained
    *---------------------------------------------------------
    | nn.Module : Base class for all neural network modules. Your models should also subclass this class.
    |
    | Arguments:
    |      output_dim    : 1 (female or male). For output layer number of nodes in output layer will be same as 
    |                      number of outputs required in your problem
    |	     hidden_dim    : Size of the hidden layer. Here size of hidden_state of the lstm
    | 		 input_dim     : Size of the vocabulary containing unique words. Total number of unique words in sample data 
    |		   embedding_dim : Size of each embedding vector. Here embeddding dimension of GloVe word embedding 
    |                      vectors is 100 so embedding_dim = 100
    |		   weights       : Pre-trained GloVe word_embeddings which we will use to create our word_embedding look-up table
    *------------------------------------------------------------------------------------
    | Function  : forward()
    | Purpose   : This function will automatically start foward propogation when model object is called
    | Arguments :
    |     text  : Input text of shape = (num_sequences, batch_size)	
	  | Return:
	  |     hidden_state : Final model state learned from input text
    ------------------------------------------------------------------------------
'''

class LSTM(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, word_embeddings):
        
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.embedding_layer = nn.Embedding(input_dim, embedding_dim)          # Embedding layer shape
        # Assign pre-trained weights and train during model training 
        # So the weight will be updated during backpropogation(if you don't want to train them during model training set requires_grad = False))
        self.embedding_layer.weight = nn.Parameter(word_embeddings, requires_grad = True)      
        self.lstm_layer       = nn.LSTM(embedding_dim, hidden_dim, num_layers = 1) # We can implement multiple layers of lstm simply by changing num_layers value 
        self.linear_layer     = nn.Linear(hidden_dim, output_dim)               # Shape of linear layer
        
    def forward(self, text):

        h_0, c_0 = self.init_hidden()   # Initialize first hidden state to all zeros
        
        # Here we will map all the indexes present in the input sequence to the corresponding 
		    # word vector using our trained word_embedddings.
	      # embedded input of shape = (num_sequences, batch_size, embedding_dimension)
        embedded_vectors = self.embedding_layer(text) 
        #print(embedded_vectors)                   
        output_state, (hidden_state, cell_state) = self.lstm_layer(embedded_vectors, (h_0, c_0))  # Apply lstm layer and start learning sequence of words
        hidden_state = self.linear_layer(hidden_state.squeeze(0))      # Apply the linear layer on output
        return torch.sigmoid(hidden_state)
    def init_hidden(self):
        h_0 = torch.zeros(1, 1, self.hidden_dim)
        c_0 = torch.zeros(1, 1, self.hidden_dim)
        return h_0, c_0

### **Step 5.2: Hyperparameters Settings**

In [None]:
'''
/*---------------- INITIALIZE_PARAMETERS ------------------
'''
input_dimension     = len(TEXT.vocab)
embedding_dimension = 100
hidden_dimension    = 10
output_dimension    = 1
number_of_epochs    = 10

### **Step 5.3: Create Model Object**

In [None]:
"""
/* ----------------------- MODEL_OBJECT -----------------
| Create the object of model class and pass parameters required: LSTM()
|           Arguments : 
|               input_dimension     : (integer) dimension of input layer(vocabulary size)
|               output_dimension    : (integer) number of output layer nodes 
|               hidden_dimension    : (integer) number of nodes/units in hidden layer
|               embedding_dimension : (integer) dimension of embedded vector
*-------------------------------------------------------*/
"""
model = LSTM(input_dimension, embedding_dimension, hidden_dimension, output_dimension, word_embeddings)
model

LSTM(
  (embedding_layer): Embedding(356, 100)
  (lstm_layer): LSTM(100, 10)
  (linear_layer): Linear(in_features=10, out_features=1, bias=True)
)

### **Step 5.4: Initialize Optimizer and Loss Function**

In [None]:
optimizer = optim.SGD(model.parameters(), lr = 1e-3)   # Initialize the optimizer
criterion = nn.BCEWithLogitsLoss()                     # Intialize loss function

### **Step 5.5: Evaluation Measure**

In [None]:
''' 
    /*----------------------------- CALCULATE_ACCURACY -------------
    | Function  : calculate_accuracy()
    | Purpose   : Calculate accuracy score
    | Arguments : 
    |       prediction : Predicted values
    |       label      : Actual values
    | Return    :
    |       accuracy   : Accuracy score
    *---------------------------------------------------------*/
'''

def calculate_accuracy(prediction, label):

    rounded_preds = torch.round(prediction)                     # Round predictions to the closest integer
    correct       = (rounded_preds == label).float()            # Convert into float for division 
    accuracy      = correct.sum() / len(correct)                # Average accuracy
    return accuracy

### **Step 5.6: Calculate Epoch Elapsed Time**

In [None]:
''' 
    /*----------------------------- EPOCH_TIME_CALCULATION -------------
    | Function  : epoch_time()
    | Purpose   : Calculate time elapsed in each epoch
    | Arguments : 
    |        start_time   : Time when an epoch's execution starts
    |        end_time     : Time when an epoch's execution end
    | Return    :
    |        elapsed_mins : Time consumed by one epoch in minutes
    |        elapsed_secs : Time consumed by one epoch in seconds
    *---------------------------------------------------------*/
'''
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time                   # Time elapsed by one epoch 
    elapsed_mins = int(elapsed_time / 60)                  # Convert time in minutes
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60)) # Convert time in seconds
    return elapsed_mins, elapsed_secs

### **Step 5.7: Train Model** 

In [None]:
''' 
    /*----------------------------- TRAIN_MODEL -------------
    | Function  : train()
    | Purpose   : Train Model
    | Arguments : 
    |        model                 : Model object
    |        training_data_iterator: Training data iterator object
    |        optimizer             : Optimization algorithm
    |        criterion             : Loss funtion
    | Return    :
    |        epoch_loss            : Train data loss at each epoch
    |        epoch_accuracy        : Train data accuracy at each epoch
    *---------------------------------------------------------*/
'''
def train(model, iterator, optimizer, criterion):
    
    epoch_loss      = 0                                                 # Initialize epoch loss to 0
    epoch_accuracy  = 0                                                 # Initialize epoch accuracy to 0
    
    model.train()                                                       # Start model training mode
    
    for batch in iterator:
        
        optimizer.zero_grad()                                           # Clear all optimized gradients
        predictions = model(batch.Text).squeeze(1)                     # Make model predictions on training data
        
        loss     = criterion(predictions, batch.Gender)                 # Calculate loss for each batch in epoch
        accuracy = calculate_accuracy(predictions, batch.Gender)        # Calculate accuracy for each batch in epoch
        
        loss.backward()                                                  # Start backward propogation
        optimizer.step()                                                 # Optimization of parameters
        
        epoch_loss      += loss.item()                                   # Add loss for all batches in one epoch
        epoch_accuracy  += accuracy.item()                               # Add accuracy for all batches in one epoch
        
    return epoch_loss / len(iterator), epoch_accuracy / len(iterator)    # Average loss and accuracy for one epoch and return

### **Step 5.8: Save Model**

In [None]:
''' 
    /*----------------------------- SAVE_MODEL -------------
    | Function  : save_model()
    | Purpose   : Save a trained model on your hard disk
    | Arguments : 
    |        drive_path: Path to the directory where the trained model will be saved
    | Return    :
    |        Trained model will be saved on hard disk
    *---------------------------------------------------------*/

'''
def save_model(drive_path):
  torch.save(model.state_dict(), drive_path + '/best-lstm-model.pt')

### **Evaluate Model**


*   **Function to be used in Validation and Test Phase**


In [None]:
''' 
    /*----------------------------- Evaluate_MODEL -------------
    | Function  : evaluate()
    | Purpose   : Function to be used in Validation and Test Phase
    | Arguments : 
    |        model                : Model object
    |        data_iterator:  Data iterator object
    | Return    :
    |        epoch_loss           : Data loss at each epoch
    |        epoch_accuracy       : Data accuracy at each epoch
    *---------------------------------------------------------*/
'''

def evaluate(model, iterator, criterion):
    
    epoch_loss     = 0      # Initialize epoch loss to 0
    epoch_accuracy = 0      # Initialize epoch accuracy to 0
    model.eval()            # Start model evaluation mode
    
    with torch.no_grad():
    
        for batch in iterator:


            predictions = model(batch.Text).squeeze(1)                # Make model predictions on data
            loss = criterion(predictions, batch.Gender)               # Calculate loss for each batch in epoch
            
            accuracy = calculate_accuracy(predictions, batch.Gender)  # Calculate accuracy for each batch in epoch
            epoch_loss += loss.item()                                 # Add loss for all batches, in one epoch
            epoch_accuracy += accuracy.item()                         # Add accuracy for all batches in one epoch
        
    return epoch_loss / len(iterator), epoch_accuracy / len(iterator)  # Average loss and accuracy for one epoch and return

# **Step 6: Execute the Validation Phase**

In [None]:
''' 
    /*----------------------------- VALIDATE_MODEL -------------
    | Function  : validation()
    | Purpose   : Evalaute the performance of a trained  model
    | Arguments : 
    |        model                   : Model object
    |        validation_data_iterator: Validation data iterator object
    |        criterion               : Loss function
    | Return    :
    |        epoch_loss           : Validation data loss at each epoch
    |        epoch_accuracy       : Validation data accuracy at each epoch
    *---------------------------------------------------------*/
'''

def validation(model, validation_iterator, criterion):
      best_validation_loss = float('inf')                                                        # Declare best validation loss variable
      validation_loss, validation_accuracy = evaluate(model, validation_iterator, criterion)     # Start model validation phase
      
      if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        save_model(drive_path)                                   # Save model on epoch where the validation loss is lowest
      return validation_loss, validation_accuracy

# **Step 7: Execute the Testing Phase**

### **Step 7.1: Load Saved Model**

In [None]:
"""
/*---------------------- LOAD_SAVED_MODEL ----------
|  Function  : load_model()
|  Purpose   : Method to load previously saved model
|  Arguments :
|       drive_path : Path of directory where model is saved
|  Return    :
|              Saved model will be loaded in memory
*---------------------------------------------------------*/
"""
def load_model(drive_path):
  return model.load_state_dict(torch.load(drive_path + '/best-lstm-model.pt'))  # Load pre-trained model

### **Step 7.2: Test Model**

In [None]:
''' 
    /*----------------------------- TEST_MODEL -------------
    | Function  : test()
    | Purpose   : Evalaute the performance of a trained  model
    | Arguments : 
    |        model                : Model object
    |        testing_data_iterator: Test data iterator object
    |        criterion            : Loss function
    | Return    :
    |        epoch_loss           : Test data loss at each epoch
    |        epoch_accuracy       : Test data accuracy at each epoch
    *---------------------------------------------------------*/
'''

def test(model, testing_iterator, criterion):
  load_model(drive_path)
  testing_loss, testing_accuracy = evaluate(model, testing_iterator, criterion)   # Start model testing
  return testing_loss, testing_accuracy

# **Step 8: Execute the Application Phase**

### **Step 8.1: Take Input from User and Convert it into Feature Vector Same as Training Data**

In [None]:
''' 
    /*----------------------------- USER_INPUT -------------
    | Function  : take_user_input()
    | Purpose   : Take unseen input from user
    | Arguments : 
    |        TEXT : Field object to apply pre-processing on input text (same as sample data)
    | Return    :
    |        user_comment_tensor : User input in machine understandable format
    |----------------------------------------------------------
    | - Let us now predict the gender on a single comment for the real time evaluation purpose 
    | 1 : Take input from user
    | 2 : Preprocess the user input
    | 3 : Fit vocabulary previously made for sample data on user input. The indexes assigned for words in 
    |     sample data will be assigned to user input. Words in user input that does not appear in 
    |     sample data will have zero value
    | 4 : Convert user input to an array
    | 5 : Make tensor from array. As pytorch only work with tensors
    *---------------------------------------------------------*/

'''

def take_user_input(TEXT):
  user_comment = input("Enter comment: ") 
  
  #Preprocess user input
  preprocessed_user_comment = TEXT.preprocess(user_comment)
  preprocessed_user_comment = [TEXT.init_token] + preprocessed_user_comment + [TEXT.eos_token]
  user_comment_vocabulary = [TEXT.vocab.stoi[x] for x in preprocessed_user_comment]
  user_comment_array = np.asarray(user_comment_vocabulary)
  user_comment_tensor = torch.LongTensor(user_comment_array).unsqueeze(1)
  user_comment_tensor = user_comment_tensor

  print("\nPreprocessed User_input:\n==========================")
  print(preprocessed_user_comment)
  print("\nIdx stored in vocab, corresponding to each word in user_input:\n==========================")
  print(user_comment_vocabulary) 
  print("\nUser_input as a tensor:\n==========================" )
  print(user_comment_tensor)
  
  return user_comment_tensor

### **Step 8.2: Load Saved Model**

In [None]:
"""
/*---------------------- LOAD_SAVED_MODEL ----------
|  Function  : load_model()
|  Purpose   : Method to load previously saved model
|  Arguments :
|       drive_path : Path of directory where model is saved
|  Return    :
|              Saved model will be loaded in memory
*---------------------------------------------------------*/
"""
def load_model(drive_path):
  return model.load_state_dict(torch.load(drive_path + '/best-lstm-model.pt'))  # Load pre-trained model

### **Step 8.3: Model Prediction**

In [None]:
"""
/*----------------------- MODEL_PREDICTION --------
|  Function  : model_prediction()
|  Purpose   : Use trained model to predict the output of unseen instances
|  Arguments : 
|       user_input : Input taken from user
|       drive_path : Path of the directory where trained model is saved
|  Return    : 
|       Gender     : Prediction 
*--------------------------------------------------
|   1. Set the model to evaluation mode
|   2. Set all the gradients to zero
|   3. Apply trained model on user input
|   
|   4. torch.round() : 
|         Return the value rounded to the closest integer (0 or 1)
|   5. If returned output is 1 Print "Male" else "Female"
*-------------------------------------------------*/   
"""

def model_predictions(user_input, drive_path):
  # Evaluate model
  load_model(drive_path)  # Load model from memory to test its performance
  model.eval()
  print("\nUser_input as embedding vectors:\n==========================" )
  with torch.no_grad():
    #print(Evaluate_text_tensor)
    # Model Prediction
    out = model(user_input)
  
  if (torch.round(out) == 1):
    Gender = "Male"
  else:
    Gender = "Female"
  return Gender

# **Main Function**

In [None]:

print("\n+=====================Execute the Training and Validation Phase=====================+\n\n")
# Step 5: Execute the Training Phase

for epoch in range(number_of_epochs):

    start_time = time.time()                                    # Start time when one epoch will start executing
    
    training_loss, training_accuracy     = train(model, training_iterator, optimizer, criterion)   # Start model training
    
    # Step 7: Execute the Validation Phase
    validation_loss, validation_accuracy = validation(model, validation_iterator, criterion)
    
    end_time = time.time()                                       # End time when one epoch will end executing
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)    # Calculate time consumed by one epoch (in minutes and seconds)
      
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTraining Loss: {training_loss:.3f}   | Training Accuracy: {training_accuracy*100:.2f}%')
    print(f'\tValidation Loss: {validation_loss:.3f} |  Validation Accuracy: {validation_accuracy*100:.2f}%')

print("\n+=====================Execute the Testing Phase=====================+\n\n")
# Step 8: Execute the Testing Phase

testing_loss, testing_accuracy = test(model, testing_iterator, criterion)
print(f'Testing Loss: {testing_loss:.3f} | Testing Accuracy: {testing_accuracy*100:.2f}%')

print("\n+===================Execute the Application Phase===================+\n\n")
# Step 7: Execute the Application Phase

user_input = take_user_input(TEXT)   # Take unseen iput from user
Gender = model_predictions(user_input, drive_path)  # Make trained model predictions on user input
print('\033[1m',"\n\nTrained Model Prediction")
print('\033[1m',"+","="*30,"+")
print('\033[1m',"|"," "*30,"|\n           Gender : ", Gender,"        \n","|                                |")
print('\033[1m',"+","="*30,"+")




Epoch: 01 | Epoch Time: 0m 0s
	Training Loss: 0.725   | Training Accuracy: 52.78%
	Validation Loss: 0.727 |  Validation Accuracy: 25.00%
Epoch: 02 | Epoch Time: 0m 0s
	Training Loss: 0.724   | Training Accuracy: 50.00%
	Validation Loss: 0.727 |  Validation Accuracy: 25.00%
Epoch: 03 | Epoch Time: 0m 0s
	Training Loss: 0.724   | Training Accuracy: 52.78%
	Validation Loss: 0.727 |  Validation Accuracy: 25.00%
Epoch: 04 | Epoch Time: 0m 0s
	Training Loss: 0.724   | Training Accuracy: 52.78%
	Validation Loss: 0.727 |  Validation Accuracy: 25.00%
Epoch: 05 | Epoch Time: 0m 0s
	Training Loss: 0.724   | Training Accuracy: 50.00%
	Validation Loss: 0.727 |  Validation Accuracy: 25.00%
Epoch: 06 | Epoch Time: 0m 0s
	Training Loss: 0.724   | Training Accuracy: 47.22%
	Validation Loss: 0.727 |  Validation Accuracy: 25.00%
Epoch: 07 | Epoch Time: 0m 0s
	Training Loss: 0.724   | Training Accuracy: 50.00%
	Validation Loss: 0.727 |  Validation Accuracy: 0.00%
Epoch: 08 | Epoch Time: 0m 0s
	Training

# **Step 9: Execute the Feedback Phase**

A Two Step Process
*   Step 1: After sometime , take Feedback from 
  * Domain Experts and Users on deployed Gender Prediction System
*   Step 2: Make a List of Possible Improvements based on Feedback received


# **Step 10: Improve Model based on Feedback**
* There is Always Room for Improvement 😊
* Based on Feedback form Domain Experts and Users
  * Improve your Model 
