**Initialization**
* I use these 3 lines of code on top of my each Notebooks because it will help to prevent any problems while reloading and reworking on a same Project or Problem. And the third line of code helps to make visualization within the Notebook.

In [None]:
#@ Initialization:
%reload_ext autoreload
%autoreload 2
%matplotlib inline 

**Downloading the Libraries and Dependencies**
* I have downloaded all the Libraries and Dependencies required for this Project in one particular cell.

In [None]:
#@ Downloading the Libraries and Dependencies:
import torch
import pandas as pd
import numpy as np
import collections
import re, json, string, os

from argparse import Namespace
from IPython.display import display
from collections import Counter
import torch.nn as nn 
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm_notebook

**Getting the Data**
* I have used Google Colab for this Project so the process of downloading and reading the Data might be different in other platforms. I have used **The Surname Dataset** which is a collection of 10000 surnames from 18 different Nationalities collected from different name sources on the Internet. The first property of this Dataset is that it is fairly Imbalanced. The second property is that there is a valid and intuitive relationships between Nationality origin and Surname Orthography. 

In [None]:
#@ Getting the Dataset:
args = Namespace(
    raw_dataset = "/content/drive/My Drive/Colab Notebooks/Surname/surnames.csv",
    train_proportion = 0.7,
    val_proportion = 0.15,
    test_proportion = 0.15,
    output_munged = "/content/drive/My Drive/Colab Notebooks/Surname/surnames_with_splits.csv",
    seed = 42
)

#@ Reading the Raw Dataset:
surnames = pd.read_csv(args.raw_dataset, header=0)
display(surnames.head(10))                                                                          # Inspecting the DataFrame.
print("\nUnique Classes:")
display(set(surnames["nationality"]))                                                               # Inspecting the Unique classes in the Dataset.

Unnamed: 0,surname,nationality
0,Woodford,English
1,Coté,French
2,Kore,English
3,Koury,Arabic
4,Lebzak,Russian
5,Obinata,Japanese
6,Rahal,Arabic
7,Zhuan,Chinese
8,Acconci,Italian
9,Mifsud,Arabic



Unique Classes:


{'Arabic',
 'Chinese',
 'Czech',
 'Dutch',
 'English',
 'French',
 'German',
 'Greek',
 'Irish',
 'Italian',
 'Japanese',
 'Korean',
 'Polish',
 'Portuguese',
 'Russian',
 'Scottish',
 'Spanish',
 'Vietnamese'}

**Processing the Dataset** 

In [None]:
#@ Splitting the Dataset on the basis of Nationality:
by_nationality = collections.defaultdict(list)                      # Collection stores the collection of Data.
for _, row in surnames.iterrows():
  by_nationality[row.nationality].append(row.to_dict())             # Creating the Dictionary.

#@ Creating the Split Data:
final_list = []
np.random.seed(args.seed)
for _, item_list in sorted(by_nationality.items()):
  np.random.shuffle(item_list)                                      # Shuffling the Data randomly.
  n = len(item_list)                                                # Number of Items.
  n_train = int(args.train_proportion * n)                          # Number of Training Dataset.
  n_val = int(args.val_proportion * n)                              # Number of Validation Dataset.
  n_test = int(args.test_proportion * n)                            # Number of Testing Dataset.
  #@ Giving the Data point a Split Attribute:
  for item in item_list[:n_train]:
    item["split"] = "train"                                         # Training Dataset.
  for item in item_list[n_train:n_train+n_val]:
    item["split"] = "val"                                           # Validation Dataset.
  for item in item_list[n_train+n_val:n_train+n_val+n_test]:
    item["split"] = "test"                                          # Testing Dataset.
  #@ Adding to the Final List:
  final_list.extend(item_list)

#@ Final Split of the Data and Creating the Final DataFrame:
final_surnames = pd.DataFrame(final_list)

#@ Inspecting the Final DataFrame:
display(final_surnames.split.value_counts())                         # Inspecting the Training, Validation and the Testing Data.
print(" ")
display(final_surnames.head())                                       # Inspecting the Final DataFrame.

train    7680
val      1640
test     1640
Name: split, dtype: int64

 


Unnamed: 0,surname,nationality,split
0,Guirguis,Arabic,train
1,Shamon,Arabic,train
2,Nader,Arabic,train
3,Kassis,Arabic,train
4,Bahar,Arabic,train


In [None]:
#@ Preparing the Final Data:
final_surnames.to_csv(args.output_munged, index=False)

**Surname Dataset Class**
* PyTorch provides an abstraction for the Dataset by providing a Dataset Class. The Dataset Class is an abstract Operator. When using PyTorch with a new Dataset it is necessary to sub class the Dataset Class and Implement the getitem and len methods. I will implement two functions: the getitem method which returns a Data point when given an index and len method returns the length of the Dataset.

In [None]:
#@ Implementing the Surname Dataset Class:
class SurnameDataset(Dataset):
  def __init__(self, surname_df, vectorizer):
    """
    Args: surname_df(pandas DataFrame): The Dataset.
        : vectorizer(SurnameVectorizer): Vectorizer Instantiated.
    """
    self.surname_df = surname_df
    self._vectorizer = vectorizer 

    self.train_df = self.surname_df[self.surname_df.split == "train"]
    self.train_size = len(self.train_df)

    self.val_df = self.surname_df[self.surname_df.split == "val"]
    self.validation_size = len(self.val_df)

    self.test_df = self.surname_df[self.surname_df.split == "test"]
    self.test_size = len(self.test_df)

    self._lookup_dict = {"train": (self.train_df, self.train_size),
                         "val": (self.val_df, self.validation_size),
                         "test": (self.test_df, self.test_size)}
    
    self.set_split("train")

    #@ Dataset Class Weights:
    class_counts = surname_df.nationality.value_counts().to_dict()
    def sort_key(item):
      return self._vectorizer.nationality_vocab.lookup_token(item[0])
    sorted_counts = sorted(class_counts.items(), key=sort_key)
    frequencies = [count for _, count in sorted_counts]
    self.class_weights = 1.0 / torch.tensor(frequencies, dtype=torch.float32)

  @classmethod
  def load_dataset_and_make_vectorizer(cls, surname_csv):
    """Load Dataset and make Vectorizer from scratch.
    Args: surname_csv: Location of the Dataset.
    Returns: An instance of the SurnameDataset.
    """
    surname_df = pd.read_csv(surname_csv)
    train_surname_df = surname_df[surname_df.split == "train"]
    return cls(surname_df, SurnameVectorizer.from_dataframe(train_surname_df))
  
  @classmethod
  def load_dataset_and_load_vectorizer(cls, surname_csv, vectorizer_filepath):
    """Load Dataset and corresponding Vectorizer.
    Args: surname_csv: Location of the Dataset.
        : vectorizer_filepath: Location of the saved Vectorizer.
    Returns: An instance of the SurnameDataset.
    """
    surname_df = pd.read_csv(surname_csv)
    vectorizer = cls.load_vectorizer_only(vectorizer_filepath)
    return cls(surname_df, vectorizer)
  
  @staticmethod
  def load_vectorizer_only(vectorizer_filepath):
    """A static method for loading the Vectorizer from file.
    Args: vectorizer_filepath: The location of the serialized vectorizer.
    Returns: An instance of SurnameVectorizer.
    """
    with open(vectorizer_filepath) as fp:
      return SurnameVectorizer.from_serializable(json.load(fp))
  
  def save_vectorizer(self, vectorizer_filepath):
    """Saves the Vectorizer to disk using json.
    Args: vectorizer_filepath: The location to save the Vectorizer.
    """
    with open(vectorizer_filepath, "w") as fp:
      json.dump(self._vectorizer.to_serializable(), fp)
  
  def get_vectorizer(self):
    return self._vectorizer
  
  def set_split(self, split="train"):
    self._target_split = split
    self._target_df, self._target_size = self._lookup_dict[split]
  
  def __len__(self):
    return self._target_size
  
  def __getitem__(self, index):
    row = self._target_df.iloc[index]
    surname_vector = self._vectorizer.vectorize(row.surname)
    nationality_index = self._vectorizer.nationality_vocab.lookup_token(row.nationality)
    return {"x_surname": surname_vector,
            "y_nationality": nationality_index}
  
  def get_num_batches(self, batch_size):
    return len(self) // batch_size
  
def generate_batches(dataset, batch_size, shuffle=True,
                       drop_last=True, device="cpu"):
  dataloader = DataLoader(dataset=dataset, batch_size=batch_size, 
                          shuffle=shuffle, drop_last=drop_last)
  for data_dict in dataloader:
    out_data_dict = {}
    for name, tensor in data_dict.items():
      out_data_dict[name] = data_dict[name].to(device)
    yield out_data_dict

**The Vocabulary Class**
* The Vocabulary is the coordination of two Python Dictionaries that form a bijection between tokens or characters here and integers. The first dictionary maps characters to integers indices and the second maps the integers indices to characters. The add_token method is used to add new tokens into the Vocabulary and look_up method is used to retrieve an index and lookup_index is used to retrieve a token given an index.

In [None]:
#@ The Vocabulary Class:
class Vocabulary(object):
  """ Class to process text and extract Vocabulary for mapping. """
  def __init__(self, token_to_idx=None, add_unk=True, unk_token="<UNK>"):
    """
    Args: token_to_idx(dict): Pre existing map of Tokens to Index.
        : add_unk(bool): A flag indicating whether to add UNK Token.
        : unk_token(string): The UNK Token to add in Vocabulary.
    """
    if token_to_idx is None:
      token_to_idx = {}
    self._token_to_idx = token_to_idx
    self._idx_to_token = {idx:token for token, idx in self._token_to_idx.items()}
    self._add_unk = add_unk
    self._unk_token = unk_token

    self.unk_index = -1
    if add_unk:
      self.unk_index = self.add_token(unk_token)
    
  def to_serializable(self):
    """Returns a dictionary that can be serialized.
    """
    return {"token_to_idx":self._token_to_idx,
            "add_unk":self._add_unk,
            "unk_token":self._unk_token}
  
  @classmethod
  def from_serializable(cls, contents):
    return cls(**contents)
  
  def add_token(self, token):
    """Update the mapping dictionary based on the Tokens.
    Args: token: The item to add into the Vocabulary.
    Returns: index: Integer corresponding to the Token.
    """
    try:
      index = self._token_to_idx[token]
    except KeyError:
      index = len(self._token_to_idx)
      self._token_to_idx[token] = index
      self._idx_to_token[index] = token
    return index
  
  def add_many(self, tokens):
    """Add a list of Tokens into Vocabulary.
    Args: tokens(list): A list of string Tokens.
    Returns: indices(list): A list of indices correspoinding to the Tokens.
    """
    return [self.add_token(token) for token in tokens]
  
  def lookup_token(self, token):
    """Retrieve the Index associated with the Token.
    Args: token(str): The Token to lookup.
    Returns: index(int): The Index correspoinding to the Token.
    """
    if self.unk_index >= 0:
      return self._token_to_idx.get(token, self.unk_index)
    else:
      return self._token_to_idx[token]
  
  def lookup_index(self, index):
    """Return the Token associated with the Index.
    Args: index(int): The Index to lookup.
    Returns: token(str): The Token correspoinding to the Index.
    """
    if index not in self._idx_to_token:
      raise KeyError("the index (%d) is not in the vocabulary" % index)
    return self._idx_to_token[index]
  
  def __str__(self):
    return "<Vocabulary(size=%d)>" % len(self)
  
  def __len__(self):
    return len(self._token_to_idx)

**The Vectorizer Class**
* The Vocabulary converts individual tokens into Integers and The Surname Vectorizer is responsible for applying the Vocabulary and converting surname into Vector. Surnames are sequence of characters and each character is an individual token in the Vocabulary. 

In [None]:
#@ The Vectorizer Class:
class SurnameVectorizer(object):
  """The Vectorizer coordinates the Vocabularies and puts them to use.
  """
  def __init__(self, surname_vocab, nationality_vocab):
    self.surname_vocab = surname_vocab
    self.nationality_vocab = nationality_vocab
  
  def vectorize(self, surname):
    """
    Args: surname: The Surname
    Returns: A collapsed one hot Encoding.
    """
    vocab = self.surname_vocab
    one_hot = np.zeros(len(vocab), dtype=np.float32)
    for token in surname:
      one_hot[vocab.lookup_token(token)] = 1
    return one_hot
  
  @classmethod
  def from_dataframe(cls, surname_df):
    surname_vocab = Vocabulary(unk_token="@")
    nationality_vocab = Vocabulary(add_unk=False)

    for index, row in surname_df.iterrows():
      for letter in row.surname:
        surname_vocab.add_token(letter)
      nationality_vocab.add_token(row.nationality)

    return cls(surname_vocab, nationality_vocab)
  
  @classmethod
  def from_serializable(cls, contents):
    surname_vocab = Vocabulary.from_serializable(contents['surname_vocab'])
    nationality_vocab =  Vocabulary.from_serializable(contents['nationality_vocab'])
    return cls(surname_vocab=surname_vocab, nationality_vocab=nationality_vocab)

  def to_serializable(self):
    return {'surname_vocab': self.surname_vocab.to_serializable(),
            'nationality_vocab': self.nationality_vocab.to_serializable()}

**The Model: Surname Classifier**
* The Surname Classifier is an Implementation of the Multi Layer Perceptron. The first Linear Layer maps the input vectors to an intermediate vector and the non linearity is applied to that vector. A second Linear Layer maps the Intermediate vector to the Prediction vector. In the last step the Softmax Function is optionally applied to make sure the outputs sum to 1 which is interpreted as Probabilities.

In [None]:
#@ The Surname Classifier using an MLP:
class SurnameClassifier(nn.Module):
  """ A Multi Layer Perceptron for classifying Surnames. """
  def __init__(self, input_dim, hidden_dim, output_dim):
    """
    Args: input_dim(int): The size of the Input Vectors.
        : hidden_dim(int): The output size of the First Linear Layer.
        : output_dim(int): The output size of the Second Lienar Layer.
    """
    super(SurnameClassifier, self).__init__()
    self.fc1 = nn.Linear(input_dim, hidden_dim)
    self.fc2 = nn.Linear(hidden_dim, output_dim)
  
  def forward(self, x_in, apply_softmax=False):
    """ The Forward pass of the Classifier. """
    intermediate_vector = F.relu(self.fc1(x_in))
    prediction_vector = self.fc2(intermediate_vector)

    if apply_softmax:
      prediction_vector = F.softmax(prediction_vector, dim=1)
    return prediction_vector

**The Training Routine**
* The Training Routine is responsible for instantiating the Model, iterating over the Dataset, computing the output of the Model when the given data as Input, computing the Loss and updating the Model proportional to the Loss.

In [None]:
#@ Helper Functions for Training Routine:
def make_train_state(args):
  return {"stop_early": False,
          "early_stopping_step": 0,
          "early_stopping_best_val": 1e8,
          "learning_rate": args.learning_rate,
          "epoch_index": 0,
          "train_loss": [],
          "train_acc": [],
          "val_loss": [],
          "val_acc": [],
          "test_loss": -1,
          "test_acc": -1,
          "model_filename": args.model_state_file}

def update_train_stage(args, model, train_state):
  """ Handles the Training state Updates. """
  #@ Saving atleast one Model:
  if train_state["epoch_index"] == 0:
    torch.save(model.state_dict(), train_state["model_filename"])
    train_state["stop_early"] = False
  #@ Saving the Model if performance is improved:
  elif train_state["epoch_index"] >= 1:
    loss_tm1, loss_t = train_state["val_loss"][-2:]
    #@ If the loss is worsened:
    if loss_t >= train_state["early_stopping_best_val"]:
      train_state["early_stopping_step"] += 1
    else:
      if loss_t < train_state["early_stopping_best_val"]:
        torch.save(model.state_dict(), train_state["model_filename"])
      train_state["early_stopping_step"] = 0
    train_state["stop_early"] = train_state["early_stopping_step"] >= args.early_stopping_criteria
  return train_state

def compute_accuracy(y_pred, y_target):
  _, y_pred_indices = y_pred.max(dim=1)
  n_correct = torch.eq(y_pred_indices, y_target).sum().item()
  return n_correct / len(y_pred_indices) * 100

In [None]:
#@ General Utilities:
def set_seed_everywhere(seed, cuda):
  np.random.seed(seed)
  torch.manual_seed(seed)
  if cuda:
    torch.cuda.manual_seed_all(seed)

def handle_dirs(dirpath):
  if not os.path.exists(dirpath):
    os.makedirs(dirpath)

In [None]:
#@ Hyperparameters and Program Options:
args = Namespace(
    #@ Data and path Information:
    surname_csv = "/content/drive/My Drive/Colab Notebooks/Surname/surnames_with_splits.csv",
    vectorizer_file = "vectorizer.json",
    model_state_file = "model.pth",
    save_dir = "model_storage/surname_mlp",
    #@ Model Hyperparameters:
    hidden_dim = 300,
    #@ Training Hyperparameters:
    seed = 42,
    num_epochs = 100,
    early_stopping_criteria = 5,
    learning_rate = 0.001,
    batch_size = 128,
    #@ Runtime Options:
    cuda = True,
    reload_from_files = False,
    expand_filepaths_to_save_dir = True
)

if args.expand_filepaths_to_save_dir:
  args.vectorizer_file = os.path.join(args.save_dir, args.vectorizer_file)
  args.model_state_file = os.path.join(args.save_dir, args.model_state_file)
  print("Expanded Filepaths: ")
  print("\t{}".format(args.vectorizer_file))
  print("\t{}".format(args.model_state_file))

#@ Checking the CUDA:
if not torch.cuda.is_available():
  args.cuda = False
args.device = torch.device("cuda" if args.cuda else "cpu")
print("Using CUDA: {}".format(args.cuda))

#@ Set seed for Reproducibility:
set_seed_everywhere(args.seed, args.cuda)

#@ Handle dirs:
handle_dirs(args.save_dir)

Expanded Filepaths: 
	model_storage/surname_mlp/vectorizer.json
	model_storage/surname_mlp/model.pth
Using CUDA: True


In [None]:
#@ Initializing the Data Training:
if args.reload_from_files:
  #@ Training from a checkpoint:
  print("Reloading!")
  dataset = SurnameDataset.load_dataset_and_load_vectorizer(
      args.surname_csv,
      args.vectorizer_file
  )
else:
  #@ Creating Dataset and Vectorizer:
  print("Creating Fresh!")
  dataset = SurnameDataset.load_dataset_and_make_vectorizer(
      args.surname_csv
  )
  dataset.save_vectorizer(args.vectorizer_file)

vectorizer = dataset.get_vectorizer()
classifier = SurnameClassifier(input_dim=len(vectorizer.surname_vocab),
                               hidden_dim=args.hidden_dim,
                               output_dim=len(vectorizer.nationality_vocab)) 

classifier = classifier.to(args.device)
dataset.class_weights = dataset.class_weights.to(args.device)
loss_func = nn.CrossEntropyLoss(dataset.class_weights)
optimizer = optim.Adam(classifier.parameters(), lr=args.learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer = optimizer,
    mode="min", factor=0.5,
    patience=1
)

Creating Fresh!


**The Training Loop**
* Using the Training Data, computes the Model Output, Loss and the Gradients. Then the Gradient is used to update the Model.

In [None]:
#@ The Training Loop:
train_state = make_train_state(args)

epoch_bar = tqdm_notebook(desc="training_routine",
                          total=args.num_epochs,
                          position=0)

dataset.set_split("train")
train_bar = tqdm_notebook(desc="split=train",
                          total=dataset.get_num_batches(args.batch_size),
                          position=1,
                          leave=True)

dataset.set_split("val")
val_bar = tqdm_notebook(desc="split=val",
                        total=dataset.get_num_batches(args.batch_size),
                        position=1,
                        leave=True)

try:
  for epoch_index in range(args.num_epochs):
    train_state["epoch_index"] = epoch_index

    #@ Iterate over the Training Dataset:
    dataset.set_split("train")
    batch_generator = generate_batches(dataset,
                                       batch_size = args.batch_size,
                                       device = args.device)
    running_loss = 0.0
    running_acc = 0.0
    classifier.train()

    for batch_index, batch_dict in enumerate(batch_generator):
      #@ Step1: Zero Gradients:
      optimizer.zero_grad()
      #@ Step2: Computing the Output:
      y_pred = classifier(batch_dict["x_surname"])
      #@ Step3: Computing the Loss:
      loss = loss_func(y_pred, batch_dict["y_nationality"])
      loss_t = loss.item()
      running_loss += (loss_t - running_loss) / (batch_index + 1)
      #@ Step4: Using loss to produce Gradients:
      loss.backward()
      #@ Step5: Using optimizer to take Gradient steps:
      optimizer.step()
      #@ Computing the accuracy:
      acc_t = compute_accuracy(y_pred, batch_dict["y_nationality"])
      running_acc += (acc_t - running_acc) / (batch_index + 1)
      #@ Updating:
      train_bar.set_postfix(loss=running_loss, acc=running_acc, epoch=epoch_index)
      train_bar.update()
    train_state["train_loss"].append(running_loss)
    train_state["train_acc"].append(running_acc)

    #@ Iterate over Validation Dataset:
    dataset.set_split("val")
    batch_generator = generate_batches(dataset, 
                                       batch_size = args.batch_size,
                                       device = args.device)
    running_loss = 0.0
    running_acc = 0.0
    classifier.eval()

    for batch_index, batch_dict in enumerate(batch_generator):
      #@ Step1: Zero Gradients:
      optimizer.zero_grad()
      #@ Step2: Computing the Output:
      y_pred = classifier(batch_dict["x_surname"])
      #@ Step3: Computing the Loss:
      loss = loss_func(y_pred, batch_dict["y_nationality"])
      loss_t = loss.to("cpu").item()
      running_loss += (loss_t - running_loss) / (batch_index + 1)
      #@ Computing the accuracy:
      acc_t = compute_accuracy(y_pred, batch_dict["y_nationality"])
      running_acc += (acc_t - running_acc) / (batch_index + 1)
      #@ Updating:
      val_bar.set_postfix(loss=running_loss, acc=running_acc, epoch=epoch_index)
      val_bar.update()
    train_state["val_loss"].append(running_loss)
    train_state["val_acc"].append(running_acc)

    train_state = update_train_stage(args=args, model=classifier, train_state=train_state)
    scheduler.step(train_state["val_loss"][-1])

    if train_state["stop_early"]:
      break
    train_bar.n = 0
    val_bar.n = 0
    epoch_bar.update()
except KeyboardInterrupt:
  print("Exiting Loop!")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0, description='training_routine', style=ProgressStyle(description_width=…

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  if sys.path[0] == '':


HBox(children=(FloatProgress(value=0.0, description='split=train', max=60.0, style=ProgressStyle(description_w…

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, description='split=val', max=12.0, style=ProgressStyle(description_wid…

In [16]:
#@ Computing the Loss and Accuracy on the Test Dataset:
classifier.load_state_dict(torch.load(train_state["model_filename"]))
classifier = classifier.to(args.device)
dataset.class_weights = dataset.class_weights.to(args.device)
loss_func = nn.CrossEntropyLoss(dataset.class_weights)

dataset.set_split("test")
batch_generator = generate_batches(dataset,
                                   batch_size=args.batch_size,
                                   device=args.device)
running_acc=0.0
running_loss=0.0
classifier.eval()

for batch_index, batch_dict in enumerate(batch_generator):
  #@ Computing the Output:
  y_pred = classifier(batch_dict["x_surname"])
  #@ Computing the Loss:
  loss = loss_func(y_pred, batch_dict["y_nationality"])
  loss_t = loss.item()
  running_loss += (loss_t - running_loss) / (batch_index + 1)
  #@ Computing the accuracy:
  acc_t = compute_accuracy(y_pred, batch_dict["y_nationality"])
  running_acc += (acc_t - running_acc) / (batch_index + 1)

train_state["test_loss"] = running_loss
train_state["test_acc"] = running_acc

print("Test Loss: {}".format(train_state["test_loss"]))
print("Test Accuracy: {}".format(train_state["test_acc"]))

Test Loss: 1.7937180300553641
Test Accuracy: 44.791666666666664


In [25]:
#@ Inferences:
def predict_nationality(name, classifier, vectorizer, k=5):
  vectorized_name = vectorizer.vectorize(name)
  vectorized_name = torch.tensor(vectorized_name).view(1, -1)
  prediction_vector = classifier(vectorized_name, apply_softmax=True)
  probability_values, indices = torch.topk(prediction_vector, k=k)

  probability_values = probability_values.detach().numpy()[0]
  indices = indices.detach().numpy()[0]

  results = []
  for prob_value, index in zip(probability_values, indices):
    nationality = vectorizer.nationality_vocab.lookup_index(index)
    results.append({"nationality": nationality,
                   "probability": prob_value})
  return results

new_surname = input("Enter the Surname: ")
classifier = classifier.to("cpu")
k = int(input("How many predictions you wish to see: "))
if k > len(vectorizer.nationality_vocab):
  print("Sorry!")
  k = len(vectorizer.nationality_vocab)

predictions = predict_nationality(new_surname, classifier, vectorizer, k=k)
print("Top {} Predictions: ".format(k))
print(" ")
for prediction in predictions:
  print("{} ----> {}(Prob={:.2f})".format(new_surname, prediction["nationality"], prediction["probability"]))

Enter the Surname: Tamang
How many predictions you wish to see: 6
Top 6 Predictions: 
 
Tamang ----> Vietnamese(Prob=0.19)
Tamang ----> Arabic(Prob=0.17)
Tamang ----> Japanese(Prob=0.14)
Tamang ----> English(Prob=0.10)
Tamang ----> Irish(Prob=0.10)
Tamang ----> Chinese(Prob=0.07)
