In [122]:
import torch
import torch.nn as nn 
import torch.nn.functional as F
from torch.utils.data import Dataset
from argparse import Namespace
import torch.optim as optim
from torch.nn import Conv1d

In [123]:
class MultilayerPerceptron(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        
        super(MultilayerPerceptron, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
              
    def forward(self, x_in, apply_softmax=False):
              
        intermediate = F.relu(self.fc1(x_in))
              
        output = self.fc2(intermediate)
        if apply_softmax:
            output = F.softmax(output, dim=1)
        return output

In [124]:
batch_size = 2
input_dim = 3
hidden_dim = 100
output_dim = 4

mlp = MultilayerPerceptron(input_dim, hidden_dim, output_dim)
print(mlp)

MultilayerPerceptron(
  (fc1): Linear(in_features=3, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=4, bias=True)
)


In [125]:
def describe(x):
    print("Type: {}".format(x.type()))
    print("Shape/size: {}".format(x.shape))
    print("Values: \n{}".format(x))
    
x_input = torch.rand(batch_size, input_dim)
describe(x_input)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0.7881, 0.7023, 0.4458],
        [0.9591, 0.0927, 0.5944]])


In [126]:
y_output = mlp(x_input, apply_softmax=False)
describe(y_output)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 4])
Values: 
tensor([[ 0.0153, -0.1098,  0.3523, -0.0426],
        [-0.0243, -0.2262,  0.4467, -0.0926]], grad_fn=<AddmmBackward0>)


In [127]:
y_output = mlp(x_input, apply_softmax=True)
describe(y_output)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 4])
Values: 
tensor([[0.2366, 0.2088, 0.3314, 0.2233],
        [0.2297, 0.1877, 0.3679, 0.2146]], grad_fn=<SoftmaxBackward0>)


In [128]:
class SurnameDataset(Dataset):
    
    def __getitem__(self, index):
        row = self.target_df.iloc[index]
        surname_vector = \
            self._vectorizer.vectorize(row.surname)
        nationality_index = \
            self._vectorizer.nationality_vocab.lookup_token(row.nationality)
        return {'x_surname': surname_vector, 
               'y_nationality': nationality_index}

In [129]:
class SurnameVectorizer(object):
    def __init__(self, surname_vocab, nationality_vocab):
        self.surname_vocab = surname_vocab
        self.nationality_vocab = nationality_vocab
        
    def vectorize (self, surname):
        vocab = self.surname_vocab
        one_hot = np.zeros(len(vocab), dtype=np.float32)
        for token in surname:
            one_hot[vocab.lookup_token(token)] = 1
        return one_hot
    
    def from_dataframe(cls, surname_df):
        surname_vocab = Vocabulary(unk_token="@")
        nationality_vocab = Vocabulary(add_unk=False)
        
        for index, row in surname_df.iterrows():
            for letter in row.surname:
                surname_vocab.add_token(letter)
            nationality_vocab.add_token(row.nationality)
            
        return cls(surname_vocab, nationality_vocab)

In [130]:
class SurnameClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        
        super(SurnameClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x_in, apply_softmax=False):
        intermediate_vector = F.relu(self, fc1(x_in))
        prediction_vector = self.fc2(intermediate_vector)
        
        if apply_softmax:
            prediction_vector = F.softmax(prediciton_vector, dim=1)
            
        return prediction_vector 

In [131]:
args = Namespace(
    surname_csv="data/surnames/surnames_with_splits.csv",
    vectorizer_file="vectorizer.json",
    model_state_file="model.pth",
    save_dir="model_storage/ch4/surname_mlp",
    hidden_dim=300,
    seed=1337,
    num_epochs=100,
    early_stopping_criteria=5,
    learning_rate=0.001,
    batch_size=64,
)

In [132]:
class SurnameDataset(Dataset):
    
    def __init__(self, surname_csv):
        
        
        def load_dataset_and_make_vectorizer(self, surname_csv):

            dataset = SurnameDataset.load_dataset_and_make_vectorizer(args.surname_csv)
            vectorizer = dataset.get_vectorizer()

            classifier = SurnameClassifier(input_dim=len(vectorizer.surname_vocab),
                              hidden_dim=args.hidden_dim,
                              output_dim=len(vectorizer.nationality_vocab))

            classifier = classifier.to(args.device)

            loss_func = nn.CrossEntropyLoss(dataset.class_weights)
            optimizer = optim.Adam(classifier.parameters(), lr=args.learning_rate)

In [133]:
class MultilayerPerceptron(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MultilayerPerceptron, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x_in, apply_softmax=False):
        
        intermediate = F.relu(self.fc1(x_in))
        output = self.fc2(F.dropout(intermediate, p=0.5))
        
        if apply_softmax:
            output = F.softmax(output, dim=1)
        return output

In [134]:
batch_size = 2
one_hot_size = 10
sequence_width = 7
data = torch.randn(batch_size, one_hot_size, sequence_width)
conv1 = Conv1d(in_channels=one_hot_size, out_channels=16, 
                kernel_size=3)

intermediate1 = conv1(data)
print(data.size())
print(intermediate1.size())

torch.Size([2, 10, 7])
torch.Size([2, 16, 5])


In [135]:
conv1 = nn.Conv1d(in_channels=one_hot_size, out_channels=16, kernel_size=3)
conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3)
conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3)

intermediate1 = conv1(data)
intermediate2 = conv2(intermediate1)
intermediate3 = conv3(intermediate2)

print(intermediate1.size())
print(intermediate2.size())
print(intermediate3.size())

torch.Size([2, 16, 5])
torch.Size([2, 32, 3])
torch.Size([2, 64, 1])


In [136]:
y_output = intermediate3.squeeze()
print(y_output.size())

torch.Size([2, 64])


In [137]:
print(intermediate1.view(batch_size, -1).size())

print(torch.mean(intermediate1, dim=2).size())

torch.Size([2, 80])
torch.Size([2, 16])


In [138]:
class SurnameDataset(Dataset):
    def __getitem__(self, index):
        row = self._target_df.iloc[index]
        
        surname_matrix = \
            self._vectorizer.vectorize(row.surname, self._max_seq_length)
        
        nationality_index = \
            self._vectorizer.nationality_vocab.lookup_token(row.nationality)
        
        return {'x_surname': surname_matrix, 
               'y_nationality': nationality_idex}

In [139]:
class SurnameVectorizer(object):
    def vectorize(self, surname):
        
        one_hot_matrix_size = (len(self.character_vocab), self.max_surname_length)
        one_hot_matriz = np.zeros(one_hot_matrix_size, dtype=np.float32)
        
        for position_index, character in enumerate(surname):
            character_index = self.character_vocab.lookup_token(character)
            one_hot_matrix[character_index][position_index] = 1
            
        return one_hot_matrix
    
    def from_dataframe(cls, surname_df):
        
        character_vocab = Vocabulary(unk_token="@")
        nationality_vocab = Vocabulary(add_unk=False)
        max_surname_length = 0 
        
        for index, row in surname_df.iterrows():
            max_surname_length = max(max_surname_length, len(row.surname))
            for letter in row.surname:
                character_vocab.add_token(row.nationality)
            nationality_vocab.add_token(row.nationality)
            
        return cls(character_vocab, nationality_vocab, max_surname_length)

In [140]:
class SurnameClassifier(nn.Module):
    def __init__(self, initial_num_channels, num_classes, num_channels):
        super(SurnameClassifier, self).__init__()
        
        self.convnet = nn.Sequential(
            nn.Conv1d(in_channels=initial_num_channels,
                      out_channels=num_channels, kernel_size=3),
            nn.ELU(),
            nn.Conv1d(in_channels=num_channels, out_channels=num_channels,
                      kernel_size=3, stride=2),
            nn.ELU(),
            nn.Conv1d(in_channels=num_channels, out_channels=num_channels,
                      kernel_size=3, stride=2),
            nn.ELU(),
        )
        self.fc = nn.Linear(num_channels, num_classes)
    
    def forward(self, x_surname, apply_softmax=False):
        features = self.convnet(x_surname).squeeze(dim=2)
        prediction_vector = self.fc(features)
        
        if apply_softmax:
            prediction_vector = F.softmax(prediction_vector, dim=1)
        
        return prediction_vector

In [143]:
args = Namespace(

    surname_csv="data/surnames/surnames_with_splits.csv",
    vectorizer_file= "vectorizer.json",
    model_state_file="model.pth",
    save_dir="model_storage/ch4/cnn",
    hidden_dim=100,
    num_channels=256,
    seed=1337,
    learning_rate=0.001,
    batch_size=128,
    num_epochs=100, 
    early_stopping_criteria=5,
    dropout_p=0.1,
)

In [145]:
def predict_nationality(surname, classifier, vectorizer):
    
    vectorized_surname = vectorizer.vectorize(surname)
    vectorized_surname = torch.tensor(vectorized_surname).unsqueeze(0)
    result = classifier(vectorized_surname, apply_softmax=True)

    probability_values, indices = result.max(dim=1)
    index = indices.item()

    predicted_nationality = vectorizer.nationality_vocab.lookup_index(index)
    probability_value = probability_values.item()

    return {'nationality': predicted_nationality, "probability": probability_value}