<a href="https://colab.research.google.com/github/Rominaarab/Big-Data/blob/main/Big_data_generate_a_name.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import random

In [3]:
names = ["Ali", "Sara", "Maryam", "Alice", "Charlie", "Bob", "David"]

In [17]:
all_char = set(''.join([name.lower() for name in names]))
chars = sorted(list(all_char))
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}

In [18]:
vocab_size = len(chars)
embedding_dim = 10
hidden_dim = 128
learning_rate = 0.005

In [19]:
class NameGeneratorRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(NameGeneratorRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden=None):
        embed = self.embedding(x)
        out, hidden = self.rnn(embed, hidden)
        out = self.fc(out)
        return out, hidden

In [20]:
model = NameGeneratorRNN(vocab_size, embedding_dim, hidden_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [22]:
for epoch in range(1000):
    name = random.choice(names)
    name = name.lower()
    input_seq = [char_to_idx[ch] for ch in name[:-1]]
    target_seq = [char_to_idx[ch] for ch in name[1:]]
    input_tensor = torch.tensor([input_seq], dtype=torch.long)
    target_tensor = torch.tensor([target_seq], dtype=torch.long)

    optimizer.zero_grad()
    output, _ = model(input_tensor)
    loss = criterion(output.squeeze(0), target_tensor.squeeze(0))
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")

Epoch 0, Loss: 2.7362160682678223
Epoch 100, Loss: 0.043751683086156845
Epoch 200, Loss: 0.0033779109362512827
Epoch 300, Loss: 0.005941391456872225
Epoch 400, Loss: 0.0016645787982270122
Epoch 500, Loss: 0.0009521010797470808
Epoch 600, Loss: 0.0008307262905873358
Epoch 700, Loss: 0.0006189793348312378
Epoch 800, Loss: 0.0003831733774859458
Epoch 900, Loss: 0.00019678131502587348


In [24]:
def generate_name(seed_char='a', max_length=10):
    model.eval()
    input_char = torch.tensor([[char_to_idx[seed_char]]], dtype=torch.long)
    hidden = None
    result = seed_char

    for _ in range(max_length):
        output, hidden = model(input_char, hidden)
        probabilities = torch.softmax(output.squeeze(0)[-1], dim=0)
        next_char_idx = torch.multinomial(probabilities, 1).item()
        next_char = idx_to_char[next_char_idx]
        result += next_char
        input_char = torch.tensor([[next_char_idx]], dtype=torch.long)
        if next_char == ' ' or len(result) >= max_length:
            break
    return result

In [25]:
print(generate_name(seed_char='a'))

aliceeeebb
