# Thinking in tensors in PyTorch

Hands-on training  by [Piotr Migdał](https://p.migdal.pl) (2019). 


## Text generation

* [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/) by Andrej Karpathy
* [RecurrentJS](http://cs.stanford.edu/people/karpathy/recurrentjs) - an in-browser demo by Andrej Karpathy
* [Unsupervised sentiment neuron by OpenAI](https://openai.com/blog/unsupervised-sentiment-neuron/)
* [Generating Magic cards using deep, recurrent neural networks](https://www.mtgsalvation.com/forums/magic-fundamentals/custom-card-creation/612057-generating-magic-cards-using-deep-recurrent-neural)

Other

* [Training a Keras model to generate colors](https://heartbeat.fritz.ai/how-to-train-a-keras-model-to-generate-colors-3bc79e54971b)


## Various practical links

* [What is the best way to remove accents in a Python unicode string?](https://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string)

In [None]:
!pip install unidecode

In [None]:
import numpy as np
import pandas as pd
from collections import Counter
from unidecode import unidecode
from sklearn.model_selection import train_test_split

import torch
from torch import nn
from torch import optim
from torch.utils.data import TensorDataset, DataLoader

from livelossplot import PlotLosses

In [None]:
names = pd.read_csv("https://www.dropbox.com/s/nu2y0p3i2jvwfki/surnames.csv?dl=1")

In [None]:
names.info()

In [None]:
names.sample(5)

In [None]:
names['language'].value_counts()

In [None]:
names['name'].apply(len).value_counts().sort_index()

In [None]:
letters_all = Counter()
for name in names['name']:
    letters_all.update(name)

In [None]:
letters_all.most_common()

In [None]:
letters = Counter()
for name in names['name']:
    letters.update(unidecode(name))
letters.most_common()

In [None]:
char2id = {c: i for i, (c, v) in enumerate(letters.items())}
id2char = {i: c for i, (c, v) in enumerate(letters.items())}

In [None]:
char2id

In [None]:
lang2id = {lang: i for i, lang in enumerate(names['language'].value_counts().index)}
lang2id

In [None]:
max_len = 20

X = np.zeros((len(names), max_len), dtype=np.int64)
X[:,:] = len(char2id)  # end id

Y = np.zeros(len(names), dtype=np.int64)

for i, (name, lang) in names.iterrows():
    Y[i] = lang2id[lang]
    for j, c in enumerate(unidecode(name)):
        X[i, j] = char2id[c]

In [None]:
X.shape

In [None]:
X[:5]

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=42)

In [None]:
trainloader = DataLoader(TensorDataset(torch.from_numpy(X_train).long(),
                                       torch.from_numpy(Y_train).long()),
                         batch_size=1, shuffle=True)
testloader = DataLoader(TensorDataset(torch.from_numpy(X_test).long(),
                                      torch.from_numpy(Y_test).long()),
                         batch_size=1, shuffle=False)

dataloaders = {
    "train": trainloader,
    "validation": testloader
}

In [None]:
# train on cuda if available
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

def train_model(model, criterion, optimizer, num_epochs=10):
    liveloss = PlotLosses()
    model = model.to(device)
    
    for epoch in range(num_epochs):
        logs = {}
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                _, preds = torch.max(outputs, 1)
                running_loss += loss.detach() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.float() / len(dataloaders[phase].dataset)
            
            prefix = ''
            if phase == 'validation':
                prefix = 'val_'

            logs[prefix + 'log loss'] = epoch_loss.item()
            logs[prefix + 'accuracy'] = epoch_acc.item()
        
        liveloss.update(logs)
        liveloss.draw()

In [None]:
class RecurrentLSTM(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.emb = nn.Embedding(len(char2id) + 1, hidden_size)
        self.lstm = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size)
        # note: input size is the numer of channels/embedding dim, NOT length
        self.fc = nn.Linear(hidden_size, len(lang2id))

    def forward(self, x):
        x = self.emb(x)
        x = x.permute(1, 0, 2)  # BLC -> LBC
        output, (hidden, cell) = self.lstm(x)
        res = self.fc(cell).squeeze(0)
        return res

In [None]:
X_example = torch.from_numpy(X_train[:1]).long().to(device)

In [None]:
model = RecurrentLSTM(16)
model

In [None]:
model(X_example)

In [None]:
model = RecurrentLSTM(16)

optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

train_model(model, criterion, optimizer, num_epochs=1)