In [69]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.utils.data import Dataset
import torch.optim as optim

In [70]:
words = open('names.txt','r').read().splitlines()

list

In [71]:
def add_periods(strings):
    return [f".{s}" for s in strings]

words = add_periods(words)

In [72]:
combined_string = "".join(words)
unique_characters = set(combined_string)
sorted_unique_characters = sorted(unique_characters)
print(sorted_unique_characters)
print(combined_string[1:10])

['.', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
emma.oliv


In [74]:


class CustomDataset(Dataset):
    def __init__(self, concatenated_str, char_list):
        self.char_list = char_list
        self.feature_tensor = self.create_feature_tensor(concatenated_str, char_list)
        self.index_tensor = self.create_index_tensor(concatenated_str, char_list)

    def create_feature_tensor(self, concatenated_str, char_list):
        feature_tensor = torch.zeros((len(concatenated_str), len(char_list)))
        for i, char in enumerate(concatenated_str):
            index = char_list.index(char)
            feature_tensor[i, index] = 1
        return feature_tensor

    def create_index_tensor(self, concatenated_str, char_list):
        index_tensor = torch.zeros(len(concatenated_str), dtype=torch.long)
        for i, char in enumerate(concatenated_str):
            index = char_list.index(char)
            index_tensor[i] = index

        index_tensor = torch.cat((index_tensor[1:], index_tensor[:1]))
        return index_tensor

    def __len__(self):
        return len(self.index_tensor)

    def __getitem__(self, idx):
        return self.feature_tensor[idx], self.index_tensor[idx]


# Create custom dataset
custom_dataset = CustomDataset(combined_string, sorted_unique_characters)

# Create DataLoader
batch_size = 114073
train_dataloader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=True)


tensor([0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.])
tensor([ 5, 13, 13,  ..., 26, 24,  0])


In [75]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [76]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(27, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 140),
            nn.ReLU(),
            nn.Linear(140, 27),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits


In [77]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=27, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=512, bias=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=256, bias=True)
    (7): ReLU()
    (8): Linear(in_features=256, out_features=256, bias=True)
    (9): ReLU()
    (10): Linear(in_features=256, out_features=256, bias=True)
    (11): ReLU()
    (12): Linear(in_features=256, out_features=140, bias=True)
    (13): ReLU()
    (14): Linear(in_features=140, out_features=27, bias=True)
  )
)


In [78]:

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

# Train the model
epochs = 6
for epoch in range(epochs):
    print(f"Epoch {epoch+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
print("Done!")

Epoch 1
-------------------------------
loss: 3.305241  [114073/228146]
Epoch 2
-------------------------------
loss: 3.304935  [114073/228146]
Epoch 3
-------------------------------
loss: 3.304895  [114073/228146]
Epoch 4
-------------------------------
loss: 3.304738  [114073/228146]
Epoch 5
-------------------------------
loss: 3.304768  [114073/228146]
Epoch 6
-------------------------------
loss: 3.304602  [114073/228146]
Done!


In [80]:
def generate_words(start_char, model, char_list, max_length=20):
    with torch.no_grad():
        model.eval()
        input_tensor = torch.zeros(1, len(char_list)).to(device)
        start_index = char_list.index(start_char)
        input_tensor[0, start_index] = 1

        generated_words = []
        current_char = start_char
        for _ in range(max_length):
            output = model(input_tensor)
            _, predicted_index = torch.max(output, dim=1)
            predicted_char = char_list[predicted_index.item()]

            generated_words.append(predicted_char)
            print(f'input tensor {input_tensor}')
            

            # Update input tensor for the next iteration
            input_tensor.zero_()
            input_tensor[0, predicted_index.item()] = 1
            

            # Debug prints
            print(f"Current Char: {current_char}, Predicted Char: {predicted_char}")
            print(f"Output logits: {output.squeeze().cpu().numpy()}")  # Convert to numpy for better readability
            if predicted_char == '.':
                break


        generated_word = ''.join(generated_words[:-1])  # Exclude the last '.'
        print(f"Generated sequence starting with '{start_char}': {generated_word}")

    return generated_word

# Example usage:
start_char = '.'  # Change start character here
generated_word = generate_words(start_char, model, sorted_unique_characters)
print(f"Generated word starting with '{start_char}': {generated_word}")


input tensor tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 1., 0., 0., 0.]])
Current Char: w, Predicted Char: w
Output logits: [-0.05942978 -0.0369483   0.00131079  0.02187892  0.03135263  0.06781182
  0.03764792 -0.01941245 -0.0453702   0.07288419 -0.00994769 -0.07041042
 -0.05054032  0.06567802 -0.03753499 -0.09150653 -0.0535771   0.07674101
  0.05886476 -0.07239634  0.01739429  0.05564881 -0.05542457  0.09783038
 -0.07530768 -0.05955213 -0.03390758]
input tensor tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 1., 0., 0., 0.]])
Current Char: w, Predicted Char: w
Output logits: [-0.05942978 -0.0369483   0.00131079  0.02187892  0.03135263  0.06781182
  0.03764792 -0.01941245 -0.0453702   0.07288419 -0.00994769 -0.07041042
 -0.05054032  0.06567802 -0.03753499 -0.09150653 -0.0535771   0.07674101
  0.05886476 -0.07239634  0.01739429  0.05564881 -0.05542457  0.09783