# Introduction

### In this following notebook I will walk you through steps needed to generate name from given dataset. 
#### I used [Anime names and Images](https://www.kaggle.com/shanmukh05/anime-names-and-image-generation) dataset for this task.
*   This is basically a RNN network, that takes input a list of encoded words and outputs a letter.We can get any length of Anime name you want.

## **Let's dive in**

In [None]:
import tensorflow as tf
import numpy as np
import os
import pandas as pd

# Loading Data

In [None]:
path = "../input/anime-names-and-image-generation/final_names.csv"

names_df = pd.read_csv(path)
names_ls = list(names_df["0"])
print("Number of names: ",len(names_ls))

names_df.head()

# Preprocessing Data

**In this below cell, I have done two things**
* Converting every anime name into id's
* Converting the id's back to name
Above two steps are done using `tf.keras.layers.experimental.preprocessing.StringLookup`

In [None]:
text = ""
for name in names_ls:
    text+=name
    
unique_chars = list(set(text))
num_chars = len(unique_chars)
print("Number of unique characters: ",num_chars)

char_to_id = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=unique_chars)
id_to_char = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=char_to_id.get_vocabulary(), invert=True)

char_ls = tf.strings.unicode_split(names_ls,input_encoding="UTF-8")
name_byte_ls = list(tf.strings.reduce_join(char_ls, axis=-1))
id_ls = list(char_to_id(char_ls))
all_ids = char_to_id(tf.strings.unicode_split(text, 'UTF-8'))

print("Example name: ",name_byte_ls[0])
print("Split: ",char_ls[0])
print("It's encoding: ",id_ls[0])

print("Number of chars in all names: ",tf.shape(all_ids)[0])

len(char_to_id.get_vocabulary())

### Padding sequences

In [None]:
padded_id = tf.keras.preprocessing.sequence.pad_sequences(id_ls,padding = "post")
print("Padded ids shape: ",padded_id.shape)

# Data Pipeline

In [None]:
seq_length = padded_id.shape[1]
BATCH_SIZE = 64
AUTOTUNE = tf.data.experimental.AUTOTUNE
SHUFFLE = 60000
ids_dataset = tf.data.Dataset.from_tensor_slices(padded_id)

def io_split(id_arr):
    inp = id_arr[:-1]
    out = id_arr[1:]
    return inp,out
ids_dataset = ids_dataset.map(io_split)
ids_dataset = ids_dataset.shuffle(SHUFFLE).batch(BATCH_SIZE,drop_remainder=True).prefetch(AUTOTUNE)

ids_dataset

In [None]:
print(char_to_id.get_vocabulary())

# Preparing Model 

In [None]:
rnn_units = 1024
embedding_dim = 256

class GenerateModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__(self)
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True, 
                                   return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(x, training=training)
        if states is None:
            states = self.gru.get_initial_state(x)
        x, states = self.gru(x, initial_state=states, training=training)
        x = self.dense(x, training=training)

        if return_state:
            return x, states
        else: 
            return x
        
model = GenerateModel(vocab_size=len(char_to_id.get_vocabulary()),embedding_dim=embedding_dim,rnn_units=rnn_units)


for input_batch, target_batch in ids_dataset.take(1):
    batch_pred = model(input_batch)
    print(batch_pred.shape)
    
print("MODEL SUMMARY")
model.summary()

In [None]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss)

checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

# TRAINING

In [None]:
EPOCHS = 10
history = model.fit(ids_dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

# GENERATING NAME

### Below `OutputStep()` Model will generate a letter in each call.
* I wll append the generated letter to a empty string till required length of name is reached

In [None]:
class OutputStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
        super().__init__()
        self.temperature=temperature
        self.model = model
        self.id_to_char = id_to_char
        self.char_to_id = char_to_id

    
        skip_ids = self.char_to_id(['','?',"#","-",'[UNK]'])[:, None]
        sparse_mask = tf.SparseTensor(
            values=[-float('inf')]*len(skip_ids),
            indices = skip_ids,
            dense_shape=[len(char_to_id.get_vocabulary())]) 
        sparse_mask = tf.sparse.reorder(sparse_mask)
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)

    @tf.function
    def generate_one_step(self, inputs, states=None):
        input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
        input_ids = self.char_to_id(input_chars).to_tensor()

    
        predicted_logits, states =  self.model(inputs=input_ids, states=states, 
                                          return_state=True)
   
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits/self.temperature
  
        predicted_logits = predicted_logits + self.prediction_mask

    
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)


        predicted_chars = self.id_to_char(predicted_ids)

        return predicted_chars, states
    
output_model = OutputStep(model, id_to_char, char_to_id)

# Final Anime Name

1. Give `req_len` and `start_letter` as input and get final anime name as output

In [None]:
req_len = 15
start_letter = "A"

states = None
next_char = tf.constant([start_letter])
result = [next_char]

for n in range(req_len):
    next_char, states = output_model.generate_one_step(next_char, states=states)
    result.append(next_char)

result = tf.strings.join(result)
print(result[0].numpy().decode('utf-8'))

## How is the generated Anime Name😁.
### Hope you got some valuable learning from this notebook.
### Happy Coding❤.

[](http://)