In [3]:
import torch
import gzip
import csv
from torch.utils.data import DataLoader, Dataset

In [5]:
# Parameters
HIDDEN_SIZE = 100
BATCH_SIZE = 256
N_LAYER = 2
N_EPOCHS = 100
N_CHARS = 128
USE_GPU = False

In [4]:
class NameDataset(Dataset):
    def __init__(self, is_train_set = True):
        filename = 'data/names_train.csv.gz' if is_train_set else 'data/names_test.csv.gz'
        with gzip.open(filename, 'rt') as f:
            reader = csv.reader(f)
            rows = list(reader)
        self.names = [row[0] for row in rows]
        self.countries = [row[1] for row in rows]
        self.len = len(self.names)
        self.country_list = list(sorted(set(self.countries)))
        self.country_dict = self.getCountryDict()
        self.country_num  = len(self.country_list)
    
    def __getitem__(self, index):
        return self.names[index], self.country_dict[self.country_list[index]]
    
    def __len__(self):
        return self.len
    
    def getCountryDict(self): # convert list into dictionary
        country_dict = dict()
        for idx, country_name in enumerate(self.country_list,0):
            country_dict[country_name] = idx
        return country_dict
    
    def idx2country(self, index):
        return self.country_list[index]
    
    def getCountriesNum(self):
        return self.country_num

In [6]:
trainset = NameDataset(is_train_set=True)
trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
testset = NameDataset(is_train_set=False)
testloader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)

N_COUNTRY = trainset.getCountriesNum()

In [8]:
trainset.country_list

['Arabic',
 'Chinese',
 'Czech',
 'Dutch',
 'English',
 'French',
 'German',
 'Greek',
 'Irish',
 'Italian',
 'Japanese',
 'Korean',
 'Polish',
 'Portuguese',
 'Russian',
 'Scottish',
 'Spanish',
 'Vietnamese']

In [None]:
if __name__ == '__main__':
    classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRY)
    if USE_GPU:
        device = torch.device('cuda:0')
        classifier.to(device)
    
    criterion = torch.nn.CrossEntropy()
    optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)
    
    start = time.time()
    print("Training for %d epochs..." % N_EPOCHS)
    acc_list = []
    for epoch in range(1, N_EPOCHS+1):
        trainModel()
        acc = testModel()
        acc_list.append(acc)