In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

2023-04-30 13:18:04.991581: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
train = pd.read_json('train.jsonl', lines=True)
valid = pd.read_json('valid.jsonl', lines=True)
test = pd.read_json('test.jsonl', lines=True)

all_letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .,;'"
n_letters = len(all_letters)

dataset = pd.concat([train, valid, test], ignore_index=True)
n_categories = dataset['country'].nunique()
all_categories = dataset['country'].unique()
categ_to_idx = {categ: idx for idx, categ in enumerate(all_categories)}
idx_to_categ = {v: k for k, v in categ_to_idx.items()}
longest_name_len = dataset['name'].str.len().max()

In [3]:
def letterToIndex(letter):
    return all_letters.find(letter)

def letterToTensor(letter):
    tensor = np.zeros((1, n_letters), dtype=np.float32)
    tensor[0][letterToIndex(letter)] = 1.0
    return tf.convert_to_tensor(tensor)

def lineToTensor(line):
    tensor = np.zeros((len(line), 1, n_letters), dtype=np.float32)
    for li, letter in enumerate(line):
        tensor[li][0][letterToIndex(letter)] = 1.0
    return tf.convert_to_tensor(tensor)


In [4]:
class RNN(tf.keras.Model):
    def __init__(self, input_size, hidden_size, output_size, sigma=0.01):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.U = tf.Variable(
            tf.random.normal((input_size, hidden_size)) * sigma)
        self.W = tf.Variable(
            tf.random.normal((hidden_size, hidden_size)) * sigma)
        self.b1 = tf.Variable(tf.zeros((hidden_size,)))
        self.relu = tf.keras.layers.ReLU()
        self.V = tf.Variable(
            tf.random.normal((hidden_size, output_size)) * sigma)
        self.b2 = tf.Variable(tf.zeros((output_size,)))
        self.softmax = tf.keras.layers.Softmax(axis=1)
        
    def call(self, inputs, hidden):
        z1 = tf.matmul(inputs, self.U)
        z2 = tf.matmul(hidden, self.W)
        hidden = self.relu(z1 + z2 + self.b1)
        z3 = tf.matmul(hidden, self.V)
        output = self.softmax(z3 + self.b2)
        return output, hidden
    
    def initHidden(self, batch_size):
        return tf.zeros((batch_size, self.hidden_size))
    
n_hidden = 128
rnn = RNN(n_letters, n_hidden, n_categories)

In [5]:
criterion = tf.keras.losses.SparseCategoricalCrossentropy()
learning_rate = 0.005
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
n_epochs = 20


def train_tensor(category_tensor, line_tensor):
    hidden = rnn.initHidden(batch_size=1)

    with tf.GradientTape() as tape:
        for i in range(line_tensor.shape[0]):
            output, hidden = rnn(line_tensor[i], hidden)
        loss = criterion(category_tensor, output)
        gradients = tape.gradient(loss, rnn.trainable_variables)
        
    optimizer.apply_gradients(zip(gradients, rnn.trainable_variables))

    return output, loss.numpy()

def evaluate_tensor(line_tensor):
    hidden = rnn.initHidden(batch_size=1)

    for i in range(line_tensor.shape[0]):
        output, hidden = rnn(line_tensor[i], hidden)

    return output


In [10]:
print(category_tensor)
print(output)

tf.Tensor([1], shape=(1,), dtype=int64)
tf.Tensor(
[[0.05551922 0.05559709 0.0555092  0.05557127 0.05554948 0.05553937
  0.05551817 0.05559023 0.0555577  0.05545083 0.05555725 0.05555254
  0.05563111 0.05554031 0.05555001 0.05561183 0.05555777 0.05559664]], shape=(1, 18), dtype=float32)


In [6]:
train_acc_list = []
valid_acc_list = []
loss_list = []

for epoch in range(n_epochs):
    print(f'Epoch: {epoch+1} / {n_epochs}')
    
    # calculate the accuracy on train set
    correct_train = 0
    total_train = 0
    for _, row in train.iterrows():
        category = row['country']
        line = row['name']
        category_tensor = tf.constant([categ_to_idx[category]], dtype=tf.int64)
        line_tensor = lineToTensor(line)
        
        output = evaluate_tensor(line_tensor)
        pred = tf.argmax(output, axis=1)
        if pred.numpy()[0] == category_tensor.numpy()[0]:
            correct_train += 1
            
    # calculate the accuracy on train set
    train_acc = correct_train / len(train)
    print(f'train_acc: {train_acc}')
    train_acc_list.append(train_acc)
    
    # calculate the accuracy on valid set
    correct_val = 0
    total_val = 0
    for _, row in valid.iterrows():
        category = row['country']
        line = row['name']
        category_tensor = tf.constant([categ_to_idx[category]], dtype=tf.int64)
        line_tensor = lineToTensor(line)
        
        output = evaluate_tensor(line_tensor)
        pred = tf.argmax(output, axis=1)
        if pred.numpy()[0] == category_tensor.numpy()[0]:
            correct_val += 1
            
    # calculate the accuracy on valid set
    valid_acc = correct_val / len(valid)
    print(f'valid_acc: {valid_acc}')
    valid_acc_list.append(valid_acc)
    
    # do training on the train set
    for _, row in train.iterrows():
        category = row['country']
        line = row['name']
        category_tensor = tf.constant([categ_to_idx[category]], dtype=tf.int64)
        line_tensor = lineToTensor(line)
        
        # forward pass
        output, loss = train_tensor(category_tensor, line_tensor)
    
    loss_list.append(float(loss))


Epoch: 1 / 20


KeyboardInterrupt: 

In [None]:
# save train_acc_values = [] val_acc_values = [] loss_values = [] to a json file
import json
with open("tensorflow_results.json" , "w") as f:
    json.dump({"train_acc": train_acc_list, "valid_acc": valid_acc_list, "loss": loss_list}, f)