In [43]:
from google.colab import files
uploaded = files.upload()

Saving Indian_Names.csv to Indian_Names (1).csv


In [60]:
import torch
from torch.utils.data import Dataset, DataLoader
import string

# Load data from text file
with open('/content/Indian_Names.csv', 'r') as file:
    names = file.read().splitlines()

# Define character set and mappings
all_chars = string.ascii_letters
n_chars = len(all_chars)
char_to_index = {char: i for i, char in enumerate(all_chars)}
index_to_char = {i: char for i, char in enumerate(all_chars)}

# Custom Dataset class
class NamesDataset(Dataset):
    def __init__(self, names):
        self.names = names

    def __len__(self):
        return len(self.names)

    def __getitem__(self, idx):
        name = self.names[idx]
        return [char_to_index[char] for char in name]

dataset = NamesDataset(names)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)


In [61]:
def one_hot_encode(index, n_categories):
    vec = torch.zeros(n_categories)
    vec[index] = 1
    return vec

def decode(tensor):
    indices = torch.argmax(tensor, dim=1)
    return ''.join([index_to_char[idx.item()] for idx in indices])


In [62]:
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden

    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

n_hidden = 128
rnn = RNN(n_chars, n_hidden, n_chars)


In [65]:
'''
criterion = nn.CrossEntropyLoss()
learning_rate = 0.005
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

n_epochs = 1000
for epoch in range(1, n_epochs + 1):
    for name_tensor in dataloader:
        name_tensor = name_tensor[0]
        hidden = rnn.init_hidden()

        rnn.zero_grad()
        loss = 0

        for i in range(len(name_tensor) - 1):
            input_char = one_hot_encode(name_tensor[i], n_chars).unsqueeze(0)
            target_char = name_tensor[i + 1].unsqueeze(0)

            output, hidden = rnn(input_char, hidden)
            loss += criterion(output, target_char)



        optimizer.step()

    if epoch % 100 == 0:
        print(f'Epoch {epoch} loss: {loss.item() / len(name_tensor)}')
'''

"\ncriterion = nn.CrossEntropyLoss()\nlearning_rate = 0.005\noptimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)\n\nn_epochs = 1000\nfor epoch in range(1, n_epochs + 1):\n    for name_tensor in dataloader:\n        name_tensor = name_tensor[0]\n        hidden = rnn.init_hidden()\n\n        rnn.zero_grad()\n        loss = 0\n\n        for i in range(len(name_tensor) - 1):\n            input_char = one_hot_encode(name_tensor[i], n_chars).unsqueeze(0)\n            target_char = name_tensor[i + 1].unsqueeze(0)\n\n            output, hidden = rnn(input_char, hidden)\n            loss += criterion(output, target_char)\n\n\n\n        optimizer.step()\n\n    if epoch % 100 == 0:\n        print(f'Epoch {epoch} loss: {loss.item() / len(name_tensor)}')\n"

In [63]:
import random

def generate_name(start_letter='A'):
    with torch.no_grad():
        input = one_hot_encode(char_to_index[start_letter], n_chars).unsqueeze(0)
        hidden = rnn.init_hidden()

        output_name = start_letter

        for _ in range(50):  # Limiting the max length of generated names
            output, hidden = rnn(input, hidden)
            topv, topi = output.topk(1)
            char_index = topi[0][0].item()

            if index_to_char[char_index] == '\n':
                break
            else:
                char = index_to_char[char_index]
                output_name += char


            input = one_hot_encode(char_index, n_chars).unsqueeze(0)
        if (output_name not in string.digits):
          return output_name

print(generate_name('A')[:7])
print(generate_name('S')[:6])
print(generate_name('R')[:5])


ACCCCCC
SOECCC
ROACC
