In [None]:
import numpy as np
import torch
import torch.nn
from torch.utils.data import Dataset, DataLoader, random_split

In [None]:
class CustomDataset(Dataset):
    def __init__(self):
        names = ["James", "Mary", "Robert", "Patricia", "John", "Jennifer", "Michael", "Linda", "David", "Elizabeth", "William", "Barbara", "Richard", "Susan", "Joseph", "Jessica", "Thomas", "Sarah", "Christopher", "Karen", "Charles", "Lisa", "Daniel", "Nancy", "Matthew", "Betty", "Anthony", "Sandra", "Mark", "Margaret", "Donald", "Ashley", "Steven", "Kimberly", "Andrew", "Emily", "Paul", "Donna", "Joshua", "Michelle", "Kenneth", "Carol", "Kevin", "Amanda", "Brian", "Melissa", "George", "Deborah", "Timothy", "Stephanie", "Ronald", "Dorothy", "Jason", "Rebecca", "Edward", "Sharon", "Jeffrey", "Laura", "Ryan", "Cynthia", "Jacob", "Amy", "Gary", "Kathleen", "Nicholas", "Angela", "Eric", "Shirley", "Jonathan", "Brenda", "Stephen", "Emma", "Larry", "Anna", "Justin", "Pamela", "Scott", "Nicole", "Brandon", "Samantha", "Benjamin", "Katherine", "Samuel", "Christine", "Gregory", "Helen", "Alexander", "Debra", "Patrick", "Rachel", "Frank", "Carolyn", "Raymond", "Janet", "Jack", "Maria", "Dennis", "Catherine", "Jerry", "Heather", "Tyler", "Diane", "Aaron", "Olivia", "Jose", "Julie", "Adam", "Joyce", "Nathan", "Victoria", "Henry", "Ruth", "Zachary", "Virginia", "Douglas", "Lauren", "Peter", "Kelly", "Kyle", "Christina", "Noah", "Joan", "Ethan", "Evelyn", "Jeremy", "Judith", "Walter", "Andrea", "Christian", "Hannah", "Keith", "Megan", "Roger", "Cheryl", "Terry", "Jacqueline", "Austin", "Martha", "Sean", "Madison", "Gerald", "Teresa", "Carl", "Gloria", "Harold", "Sara", "Dylan", "Janice", "Arthur", "Ann", "Lawrence", "Kathryn", "Jordan", "Abigail", "Jesse", "Sophia", "Bryan", "Frances", "Billy", "Jean", "Bruce", "Alice", "Gabriel", "Judy", "Joe", "Isabella", "Logan", "Julia", "Alan", "Grace", "Juan", "Amber", "Albert", "Denise", "Willie", "Danielle", "Elijah", "Marilyn", "Wayne", "Beverly", "Randy", "Charlotte", "Vincent", "Natalie", "Mason", "Theresa", "Roy", "Diana", "Ralph", "Brittany", "Bobby", "Doris", "Russell", "Kayla", "Bradley", "Alexis", "Philip", "Lori", "Eugene", "Marie"]
        self.data = []
        for name in names:
            x = (CustomDataset.make_tensor(ch) for ch in name[:-1])
            y = (CustomDataset.make_tensor(ch, dtype=torch.long) for ch in name[1:])
            self.data.append((torch.stack(list(x)), torch.stack(list(y))))

    @staticmethod
    def make_tensor(ch, dtype=torch.float32):
        tensor = torch.zeros(52, dtype=dtype)
        if  ord('a') <= ord(ch) <= ord('z'):
            tensor[ord(ch) - ord('a')] = 1 
        if  ord('A') <= ord(ch) <= ord('Z'):
            tensor[26 + ord(ch) - ord('A')] = 1 
        return tensor

    @staticmethod
    def from_tensor(tensor):
        values, indices = torch.topk(tensor, k=1)
        ch = indices[0].item()
        return chr(ord('A') + ch - 26) if ch >= 26 else chr(ord('a') + ch)
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

train_dataset = CustomDataset()
test_dataset = CustomDataset()
train = DataLoader(train_dataset, batch_size=1)
test = DataLoader(test_dataset, batch_size=1)

In [None]:
class RNNetwork(torch.nn.Module):
    def __init__(self, input_size, output_size, hidden_size, state_size):
        super().__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.state_size = state_size

        self.i2s = torch.nn.Linear(self.input_size + self.state_size, self.state_size)
        self.i2h = torch.nn.Linear(self.input_size + self.state_size, self.hidden_size)
        self.h2h = torch.nn.Linear(self.hidden_size, self.hidden_size)
        self.h2o = torch.nn.Linear(self.hidden_size, self.output_size)
        self.dropout = torch.nn.Dropout(0.15)
        self.softmax = torch.nn.LogSoftmax(dim=0)

    def forward(self, i: torch.Tensor, state: torch.Tensor):
        i_ = torch.cat((i, state))
        s = self.i2s(i_)
        h = self.i2h(i_)
        h2 = self.h2h(torch.relu(h))
        o = self.h2o(torch.relu(h2))
        o = self.dropout(o)
        return self.softmax(o), s

    def init_hidden(self):
        return torch.zeros(self.state_size)

In [None]:
def train_loop(dataloader, model, loss, optimizer, epoch):
    model.train()
    total_loss = 0

    for batch_idx, (batched_x, batched_y) in enumerate(dataloader):
        cost = 0
        for x, y in zip(batched_x, batched_y):
            state = model.init_hidden()
            for i, (x_, y_) in enumerate(zip(x, y)):
                pred, state = model(x_, state)
                cost += loss(pred, torch.argmax(y_))
        cost.backward()
        if (batch_idx + 1) % 10 == 0:
            optimizer.step()
            optimizer.zero_grad()
        batch_loss = cost.item() / (batched_x.size(0) * batched_x.size(1) * batched_x.size(2))
        total_loss += batch_loss

    return total_loss


def test_loop(dataloader, model, loss, epoch):
    model.eval()
    test_loss= 0
    with torch.no_grad():
        for batch_idx, (batched_x, batched_y) in enumerate(dataloader):
            batch_loss = 0
            for x, y in zip(batched_x, batched_y):
                state = model.init_hidden()
                for x_, y_ in zip(x, y):
                    pred, state = model(x_, state)
                    batch_loss += loss(pred, torch.argmax(y_))
            batch_loss /= batched_x.size(0) * batched_x.size(1) * batched_x.size(2)
            test_loss += batch_loss
    return test_loss.item()


In [None]:
HIDDEN_SIZE = 30
STATE_SIZE = 20

def main():
    device = (
        "cuda"
        if torch.cuda.is_available()
        else "mps" if torch.backends.mps.is_available() else "cpu"
    )
    print(f"Using {device} device")

    model = RNNetwork(26 * 2, 26 * 2, HIDDEN_SIZE, STATE_SIZE).to(device)
    loss = torch.nn.NLLLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.005)
    epochs = 50
    for epoch in range(epochs):
        train_loss = train_loop(train, model, loss, optimizer, epoch)
        test_loss = test_loop(test, model, loss, epoch)

        print("---------------")
        print(f"Epoch: {epoch+1}")
        print("Loss in training: ", train_loss)
        print("Loss in test:", test_loss)
        print("---------------\n")
        
    torch.save(model.state_dict(), "../out/rnn_names.pth")

main()

In [None]:
def sandbox():
    device = (
        "cuda"
        if torch.cuda.is_available()
        else "mps" if torch.backends.mps.is_available() else "cpu"
    )
    print(f"Using {device} device")
    model = RNNetwork(26 * 2, 26 * 2, HIDDEN_SIZE, STATE_SIZE).to(device)
    model.load_state_dict(torch.load("../out/rnn_names.pth"))
    model.eval()

    with torch.no_grad():
        while True:
            try:
                name = input("Enter name: ").strip()
            except KeyboardInterrupt:
                break

            if name == "":
                break

            name_tensors = [CustomDataset.make_tensor(ch) for ch in name]

            state = model.init_hidden()
            pred = None
            for tensor in name_tensors:
                pred, state = model(tensor, state)

            prob = torch.exp(pred)
            prob = prob / torch.sum(prob)
            for prob, idx in zip(*torch.topk(prob, k=5)):
                ch = idx.item()
                c = chr(ord('A') + ch - 26) if ch >= 26 else chr(ord('a') + ch)
                print(f"{c}: {prob.item():.2f}")
            print("\nExpected: ", name + CustomDataset.from_tensor(pred))
sandbox()