In [100]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [101]:
girl_name = pd.read_csv("girl_name.csv")
boy_name = pd.read_csv("boy_name.csv")

In [102]:
girl_name.head()

Unnamed: 0,name
0,Olivia
1,Emma
2,Ava
3,Sophia
4,Isabella


In [68]:
boy_name.head()

Unnamed: 0,name
0,Liam
1,Noah
2,Oliver
3,Elijah
4,William


In [69]:
# Labeling girl ->0, boy -> 1
girl_name['gender'] = [0]*len(girl_name)
boy_name['gender'] = [1]*len(boy_name)

In [70]:
data = pd.concat([girl_name, boy_name])

In [103]:
class Name_Dataset(Dataset):
    def __init__(self, data):
        super().__init__()
        self.data = data.sample(frac=1).reset_index(drop=True)
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        a = self.data['name'].loc[idx].lower()
        n = np.array([np.eye(26)[ord(x)-0x61] for x in a])
        
        y = self.data['gender'].loc[idx]

        return torch.Tensor(n), torch.FloatTensor([y])

In [104]:
dataset = Name_Dataset(data)
dataloader = DataLoader(dataset=dataset,
                        batch_size=1,
                        shuffle=True,
                        num_workers=2)

In [105]:
class Model(nn.Module):
    def __init__(self):
        super().__init__() 
        self.rnn = nn.RNN(input_size=26, hidden_size=32, num_layers=1)
        self.linear = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        out = self.rnn(x)[0][:, -1, :] 
        return self.sigmoid(self.linear(out))

In [107]:
model = Model().cuda()
loss_func = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [108]:
total_epochs = 3
for epoch in range(total_epochs):
    success = 0
    loss_tot = []
    for X, y in dataloader:
        X = X.cuda()
        y_pred = model(X)
        y = y.cuda()
        loss = loss_func(y_pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        y_pred[y_pred > 0.5] = 1
        y_pred[y_pred < 0.5] = 0
        success += (y_pred == y).sum()
        
        loss_tot.append(loss.item())
    
    if epoch % 1 == 0:
        print(f'Epoch: {epoch}, Train Loss: {np.array(loss_tot).mean():.3f}, Train Accuracy: {success/2000:.3f}')

Epoch: 0, Train Loss: 0.551, Train Accuracy: 0.739
Epoch: 1, Train Loss: 0.502, Train Accuracy: 0.765
Epoch: 2, Train Loss: 0.498, Train Accuracy: 0.764


In [109]:
def predict(name):
    a = name.lower()
    n = np.array([[np.eye(26)[ord(x)-0x61] for x in a]])
    X = torch.Tensor(n)
    y_pred = model(X.cuda())
    if y_pred < 0.5:
        print(f'{name} should be a girl!')
    else:
        print(f'{name} should be a boy!')

In [115]:
predict('Ron')

Ron should be a boy!
