In [13]:
from google.colab import files
uploaded = files.upload()


Saving names.txt to names (1).txt


In [23]:

import torch
from torch.utils.data import Dataset, DataLoader
import string

class NameDataset(Dataset):
    def __init__(self, filepath):
        with open(filepath, 'r') as file:
          self.names = file.read().splitlines()
        self.names = [self.clean_name(name) for name in self.names]
        self.chars = sorted(list(set(''.join(self.names))))
        self.char_to_index = {char: idx for idx, char in enumerate(self.chars)}
        self.index_to_char = {idx: char for idx, char in enumerate(self.chars)}
        self.vocab_size = len(self.chars)
    def clean_name(self, name):

        cleaned_name = ''.join(char for char in name if char not in string.punctuation and not char.isdigit())
        return cleaned_name

    def __len__(self):
        return len(self.names)

    def __getitem__(self, idx):
        name = self.names[idx]
        name_indices = [self.char_to_index[char] for char in name]
        return torch.tensor(name_indices, dtype=torch.long)

    def one_hot_encode(self, index):
        one_hot = torch.zeros(self.vocab_size)
        one_hot[index] = 1
        return one_hot

    def decode(self, indices):
        return ''.join([self.index_to_char[idx] for idx in indices])

dataset = NameDataset('names (1).txt')
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)


In [24]:
import torch.nn as nn

class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out[:, -1, :])
        return out, hidden

    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size)

model = RNNModel(input_size=dataset.vocab_size, hidden_size=128, output_size=dataset.vocab_size)


In [41]:
import torch.optim as optim
import torch.nn.functional as F

num_epochs = 10
learning_rate = 0.00000001


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


for epoch in range(num_epochs):
    for i, name_tensor in enumerate(dataloader):
        name_tensor = name_tensor.squeeze(0)
        hidden = model.init_hidden()

        loss = 0
        for t in range(name_tensor.size(0) - 1):
            input_char = dataset.one_hot_encode(name_tensor[t])
            target_char = name_tensor[t + 1]
            input_char = input_char.view(1, 1, -1)

            output, hidden = model(input_char, hidden)
            loss += criterion(output, target_char.view(-1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/10], Loss: 7.3737
Epoch [2/10], Loss: 13.0401
Epoch [3/10], Loss: 9.4209
Epoch [4/10], Loss: 8.0726
Epoch [5/10], Loss: 8.2736
Epoch [6/10], Loss: 15.5012
Epoch [7/10], Loss: 16.0516
Epoch [8/10], Loss: 5.6069
Epoch [9/10], Loss: 9.0011
Epoch [10/10], Loss: 7.7704


In [42]:
import numpy as np

def generate_name(model, start_char, dataset, max_length=20):
    model.eval()
    input_char = dataset.one_hot_encode(dataset.char_to_index[start_char]).view(1, 1, -1)
    hidden = model.init_hidden()
    name_generated = [start_char]

    for _ in range(max_length):
        output, hidden = model(input_char, hidden)
        output_dist = output.data.view(-1).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        char = dataset.index_to_char[top_i.item()]
        name_generated.append(char)
        if char == '\n':
            break
        input_char = dataset.one_hot_encode(top_i).view(1, 1, -1)

    return ''.join(name_generated)

import numpy as np

def generate_name(model, start_char, dataset, max_length=20):
    model.eval()
    if start_char not in dataset.char_to_index:
        raise ValueError(f"Start character '{start_char}' not found in dataset.")

    input_char = dataset.one_hot_encode(dataset.char_to_index[start_char]).view(1, 1, -1)
    hidden = model.init_hidden()
    name_generated = [start_char]

    for _ in range(max_length - 1):
        output, hidden = model(input_char, hidden)
        output_dist = output.data.view(-1).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        char = dataset.index_to_char[top_i.item()]

        if char == '\n':  # End of name
            break

        name_generated.append(char)
        input_char = dataset.one_hot_encode(top_i).view(1, 1, -1)

    return ''.join(name_generated)

print('please enter the first char of name: ')
start_char=input()
print('please enter max length of the name required: ')
max_length=int(input())
print('please enter how many names you require: ')
n=int(input())

print(f"starting char '{start_char}':" )
for i in range(n):
   generated_name = generate_name(model, start_char, dataset, max_length=5)
   print(f"{generated_name}")


please enter the first char of name: 
m
please enter max length of the name required: 
6
please enter how many names you require: 
100
starting char 'm':
mangu
mango
manhu
masom
misar
mamin
moham
minal
muril
maniu
moral
manti
mashu
mhirs
magra
munni
manir
madbu
maliy
mahsh
malan
mausk
mosis
mansm
mamro
marar
monty
mausu
mahno
mukna
muhke
manth
muden
malma
mohan
mudli
milus
manku
manty
mahfa
maish
miten
madit
mulal
mamst
muddr
manga
murni
mohes
mahsu
mango
mahju
meshn
mahar
mulus
muhdh
marwa
masib
mohav
mahin
manwi
munir
mardo
manen
mojme
mahpa
minne
masli
manku
mahvi
manha
mtara
mante
meyta
morch
maafu
mosam
mwetr
mahsi
mohan
moola
mandi
mainn
maeli
mohno
muhso
muman
manwa
mahur
meraj
menap
marib
matru
mailu
makit
magna
manuv
manga
mjeem
meera
