In [None]:
%%writefile requirements.txt
torch
numpy
pandas
scikit-learn
razdel

In [None]:
!pip install --upgrade -r requirements.txt

## Датасет

In [None]:
!rm negative.csv
!rm positive.csv
!wget https://www.dropbox.com/s/r6u59ljhhjdg6j0/negative.csv
!wget https://www.dropbox.com/s/fnpq3z4bcnoktiv/positive.csv

In [None]:
import torch
import re
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

n = ['id', 'date', 'name', 'text', 'typr', 'rep', 'rtw', 'faw', 'stcount', 'foll', 'frien', 'listcount']
data_positive = pd.read_csv('positive.csv', sep=';', error_bad_lines=False, names=n, usecols=['text'])
data_negative = pd.read_csv('negative.csv', sep=';', error_bad_lines=False, names=n, usecols=['text'])

sample_size = min(data_positive.shape[0], data_negative.shape[0])
raw_data = np.concatenate((data_positive['text'].values[:sample_size], data_negative['text'].values[:sample_size]), axis=0)

def preprocess_text(text):
    text = text.lower().replace("ё", "е")
    text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))', 'URL', text)
    text = re.sub('@[^\s]+', 'USER', text)
    text = re.sub('[^a-zA-Zа-яА-Я1-9]+', ' ', text)
    text = re.sub(' +', ' ', text)
    return text.strip()

df_train = pd.DataFrame(columns=['text', 'label'])
df_val = pd.DataFrame(columns=['text', 'label'])
df_test = pd.DataFrame(columns=['text', 'label'])

data = [preprocess_text(t) for t in raw_data]
labels = [1] * sample_size + [0] * sample_size
df_train['text'], df_test['text'], df_train['label'], df_test['label'] = train_test_split(data, labels, test_size=0.2, random_state=1)
df_train, df_val = train_test_split(df_train, test_size=0.2, random_state=1)
df_train

In [None]:
from collections import Counter
from razdel import tokenize


class Vocabulary:
    def __init__(self):
        self.word2index = {
            "<pad>": 0,
            "<unk>": 1
        }
        self.index2word = ["<pad>", "<unk>"]

    def build(self, texts, min_count=7):
        words_counter = Counter(token for tokens in texts for token in tokens)
        for word, count in words_counter.most_common():
            if count >= min_count:
                self.word2index[word] = len(self.word2index)
        self.index2word = [word for word, _ in sorted(self.word2index.items(), key=lambda x: x[1])]
    
    @property
    def size(self):
        return len(self.index2word)
    
    def top(self, n=100):
        return self.index2word[1:n+1]
    
    def get_index(self, word):
        return self.word2index.get(word, 0)
    
    def get_word(self, index):
        return self.index2word[index]

vocabulary = Vocabulary()
train_texts = df_train["text"].tolist()
train_texts = [[token.text for token in tokenize(text)] for text in train_texts]
vocabulary.build(train_texts)
assert vocabulary.word2index[vocabulary.index2word[10]] == 10
print(vocabulary.size)
print(vocabulary.top(100))

17198
['<unk>', 'USER', 'не', 'я', 'и', 'в', 'на', 'rt', 'а', 'что', 'URL', 'с', 'как', 'у', 'все', 'меня', 'то', 'это', 'так', 'мне', 'd', 'но', 'ты', 'ну', 'по', 'за', 'еще', 'уже', 'вот', 'да', 'же', 'только', 'сегодня', 'о', 'бы', 'нет', 'когда', 'хочу', 'к', 'очень', 'тебя', 'из', 'он', '3', 'день', 'просто', 'мы', 'будет', '2', 'от', 'было', 'если', 'тебе', 'теперь', 'надо', 'даже', 'тоже', 'завтра', 'кто', 'до', 'там', 'его', '1', 'вообще', 'есть', 'для', 'она', 'сейчас', 'спасибо', 'нас', 'буду', 'почему', 'блин', 'могу', 'люблю', 'без', 'знаю', 'вы', 'они', 'тут', 'или', 'раз', 'мой', 'чем', 'ничего', 'со', 'больше', 'всегда', '5', 'хорошо', 'дома', 'про', 'всем', 'можно', 'ее', 'может', 'год', 'потом', 'был', 'спать']


In [None]:
train_texts[10:20]

[['rt',
  'USER',
  'а',
  'здесь',
  'гарри',
  'вообще',
  'подмышки',
  'брызгал',
  'похуй',
  'вообще',
  'на',
  'камеры',
  'х',
  'URL'],
 ['rt',
  'USER',
  'работники',
  'метро',
  'не',
  'обнимаются',
  'им',
  'нельзя',
  'прижиматься',
  'URL'],
 ['USER',
  'но',
  'думаю',
  'там',
  'подвох',
  'какой',
  'не',
  'буду',
  'не',
  'заснет',
  'еще',
  'потом'],
 ['USER',
  'дело',
  'плохо',
  'ватси',
  'у',
  'меня',
  'интернет',
  'отобрали',
  'я',
  'умираю'],
 ['капец', 'посорилась', 'фиг', 'теперь', 'в', 'интернет', 'зайду'],
 ['USER', 'как', 'жаль', 'что', 'тебя', 'нельзя', 'ретвитнуть'],
 ['разговор',
  'двух',
  'мелких',
  'девочек',
  'ксюша',
  'там',
  'страшно',
  'я',
  'вообше',
  'то',
  'храбрая',
  'я',
  'не',
  'боюсь',
  'ахахаха',
  'без',
  'комментариев',
  'd'],
 ['вчера',
  'взяла',
  'и',
  'просто',
  'сказала',
  'что',
  'переболела',
  'им',
  'а',
  'он',
  'мне',
  'молодец'],
 ['rt',
  'USER',
  'USER',
  'принес',
  'ноут',
  'я',


In [None]:
train_labels = df_train["label"].tolist()
val_labels = df_val["label"].tolist()
test_labels = df_test["label"].tolist()
train_texts = [[token.text for token in tokenize(text)] for text in df_train["text"].tolist()]
val_texts = [[token.text for token in tokenize(text)] for text in df_val["text"].tolist()]
test_texts = [[token.text for token in tokenize(text)] for text in df_test["text"].tolist()]

In [None]:
train_texts[10:20]

[['rt',
  'USER',
  'а',
  'здесь',
  'гарри',
  'вообще',
  'подмышки',
  'брызгал',
  'похуй',
  'вообще',
  'на',
  'камеры',
  'х',
  'URL'],
 ['rt',
  'USER',
  'работники',
  'метро',
  'не',
  'обнимаются',
  'им',
  'нельзя',
  'прижиматься',
  'URL'],
 ['USER',
  'но',
  'думаю',
  'там',
  'подвох',
  'какой',
  'не',
  'буду',
  'не',
  'заснет',
  'еще',
  'потом'],
 ['USER',
  'дело',
  'плохо',
  'ватси',
  'у',
  'меня',
  'интернет',
  'отобрали',
  'я',
  'умираю'],
 ['капец', 'посорилась', 'фиг', 'теперь', 'в', 'интернет', 'зайду'],
 ['USER', 'как', 'жаль', 'что', 'тебя', 'нельзя', 'ретвитнуть'],
 ['разговор',
  'двух',
  'мелких',
  'девочек',
  'ксюша',
  'там',
  'страшно',
  'я',
  'вообше',
  'то',
  'храбрая',
  'я',
  'не',
  'боюсь',
  'ахахаха',
  'без',
  'комментариев',
  'd'],
 ['вчера',
  'взяла',
  'и',
  'просто',
  'сказала',
  'что',
  'переболела',
  'им',
  'а',
  'он',
  'мне',
  'молодец'],
 ['rt',
  'USER',
  'USER',
  'принес',
  'ноут',
  'я',


In [None]:
np.random.seed(42)
torch.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import time

def get_next_batch(texts, labels, max_seq_len=100, batch_size=128):
    indices = np.arange(len(texts))
    np.random.shuffle(indices)
    batch_begin = 0
    while batch_begin < len(texts):
        batch_indices = indices[batch_begin: batch_begin + batch_size]
        batch = []
        batch_labels = []
        batch_max_len = 0
        for data_ind in batch_indices:
            batch_labels.append(labels[data_ind])
            sample = [vocabulary.get_index(token) for token in texts[data_ind]][:max_seq_len]
            batch_max_len = max(batch_max_len, len(sample))
            sample += [0] * (max_seq_len - len(sample))
            batch.append(sample)
        batch_begin += batch_size
        batch = torch.cuda.LongTensor(batch)[:, :batch_max_len]
        yield batch, torch.cuda.LongTensor(batch_labels)


def train_model(model, texts, labels, val_texts, val_labels, epochs_count=10, 
                loss_every_nsteps=1000, lr=0.01, save_path="model.pt", device_name="cuda"):
    params_count = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Trainable params: {}".format(params_count))
    device = torch.device(device_name)
    model = model.to(device)
    total_loss = 0
    start_time = time.time()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_function = nn.CrossEntropyLoss().cuda()
    prev_avg_val_loss = None
    for epoch in range(epochs_count):
        model.train()
        for step, (batch, batch_labels) in enumerate(get_next_batch(texts, labels)):
            logits = model(batch) # Прямой проход
            loss = loss_function(logits, batch_labels) # Подсчёт ошибки
            loss.backward() # Подсчёт градиентов dL/dw
            optimizer.step() # Градиентный спуск или его модификации (в данном случае Adam)
            optimizer.zero_grad() # Зануление градиентов, чтобы их спокойно менять на следующей итерации
            total_loss += loss.item()
        val_total_loss = 0
        val_batch_count = 0
        model.eval()
        for _, (batch, batch_labels) in enumerate(get_next_batch(val_texts, val_labels)):
            logits = model(batch) # Прямой проход
            val_total_loss += loss_function(logits, batch_labels) # Подсчёт ошибки
            val_batch_count += 1
        avg_val_loss = val_total_loss/val_batch_count
        print("Epoch = {}, Avg Train Loss = {:.4f}, Avg val loss = {:.4f}, Time = {:.2f}s".format(epoch, total_loss / loss_every_nsteps, avg_val_loss, time.time() - start_time))
        total_loss = 0
        start_time = time.time()

        if prev_avg_val_loss is not None and avg_val_loss > prev_avg_val_loss:
            model.load_state_dict(torch.load(save_path))
            model.eval()
            break
        prev_avg_val_loss = avg_val_loss
        torch.save(model.state_dict(), save_path)

In [None]:
from sklearn.metrics import accuracy_score

def test_model(model, texts, labels):
    predicted_labels = []
    true_labels = []
    model.eval()
    for step, (batch, batch_labels) in enumerate(get_next_batch(texts, labels)):
        logits = model(batch) # Прямой проход
        predicted_labels.extend(torch.max(logits.detach().cpu(), 1)[1].numpy())
        true_labels.extend(batch_labels.detach().cpu().numpy())
    print(accuracy_score(true_labels, predicted_labels))

## Сеть прямого распространения

## Коротко о Word2Vec
Обучение:

![embeddings training](https://miro.medium.com/max/1400/0*o2FCVrLKtdcxPQqc.png)
*From [An implementation guide to Word2Vec using NumPy and Google Sheets
](https://towardsdatascience.com/an-implementation-guide-to-word2vec-using-numpy-and-google-sheets-13445eebd281)*

![embeddings relations](https://www.tensorflow.org/images/linear-relationships.png)
*From [Vector Representations of Words, Tensorflow tutorial](https://www.tensorflow.org/tutorials/representation/word2vec)*

Статьи:
* Word2Vec: [Distributed Representations of Words and Phrases
and their Compositionality](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf), Mikolov et al., 2013
* GloVe: [GloVe: Global Vectors for Word Representation](https://nlp.stanford.edu/pubs/glove.pdf), Pennington, Socher, Manning, 2014
* fastText: [Enriching Word Vectors with Subword Information](https://arxiv.org/pdf/1607.04606.pdf), Bojanowski, Grave, Joulin, Mikolov, 2016

Ссылки:
* Word2Vec и fasttext модели для русского: https://rusvectores.org/ru/
* fasttext для кучи языков: https://fasttext.cc/
* Ещё fasttext модели для русского: http://docs.deeppavlov.ai/en/master/features/pretrained_vectors.html
* Отдельная библиотека для русских векторов: https://github.com/natasha/navec
* Word2Vec для кучи языков, обученная на Вики: https://wikipedia2vec.github.io/wikipedia2vec/pretrained/
* Word2Vec для английского от Гугла: https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM
* Огромная Word2Vec модель для русского: https://zenodo.org/record/400631#.Xa4RPN9fjCI

In [None]:
class FFModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim=64, hidden_dim=48):
        super().__init__()
        
        self.embeddings_layer = nn.Embedding(vocab_size, embedding_dim)
        self.hidden_layer = nn.Linear(embedding_dim, hidden_dim)
        self.relu_layer = nn.ReLU()
        self.dropout_layer = nn.Dropout(0.2)
        self.out_layer = nn.Linear(hidden_dim, 2)

    def forward(self, inputs):
        projections = self.embeddings_layer.forward(inputs)
        projections = self.dropout_layer(self.relu_layer(self.hidden_layer(projections)))
        pooling = torch.max(projections, 1)[0]
        output = self.out_layer.forward(pooling)
        return output

model = FFModel(vocabulary.size, 64)
train_model(model, train_texts, train_labels, val_texts, val_labels)
test_model(model, test_texts, test_labels)

Trainable params: 1103890
Epoch = 0, Avg Train Loss = 0.5981, Avg val loss = 0.5167, Time = 6.97s
Epoch = 1, Avg Train Loss = 0.5253, Avg val loss = 0.4989, Time = 4.59s
Epoch = 2, Avg Train Loss = 0.5028, Avg val loss = 0.5065, Time = 4.12s
0.7443600625418807


## Свёрточная сеть
![Conv example](https://image.ibb.co/e6t8ZK/Convolution.gif)

*From [Feature extraction using convolution](http://deeplearning.stanford.edu/wiki/index.php/Feature_extraction_using_convolution).*
![NLP conv example](https://user-images.githubusercontent.com/6512394/41590312-b1c28fca-73f1-11e8-9123-e26a03853cc7.png)

*From [(Text-Classification-Pytorch)](https://github.com/dongjun-Lee/text-classification-models-tf)*


In [None]:
class CnnModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim=32, filters=((2, 10), (3, 8))):
        super().__init__()
        
        self.embeddings_layer = nn.Embedding(vocab_size, embedding_dim)
        self.filters = []
        all_filters_count = 0
        for kernel_size, filters_count in filters:
            all_filters_count += filters_count
            self.filters.append(nn.Conv2d(1, filters_count, (kernel_size, embedding_dim), padding=(1, 0)))
        self.filters = nn.ModuleList(self.filters)
        self.relu_layer = nn.ReLU()
        self.dropout_layer = nn.Dropout(0.2)
        self.out_layer = nn.Linear(all_filters_count, 2)

    def forward(self, inputs):
        projections = self.embeddings_layer.forward(inputs)
        projections = projections.unsqueeze(1)
        # print(projections.size())
        results = []
        for f in self.filters:
            convolved = self.dropout_layer(self.relu_layer(f(projections))).squeeze(3)
            pooling = torch.max(convolved, 2)[0]
            results.append(pooling)
        output = torch.cat(results, 1)
        output = self.out_layer.forward(output)
        return output

model = CnnModel(vocabulary.size, 64)
train_model(model, train_texts, train_labels, val_texts, val_labels)
test_model(model, test_texts, test_labels)

Trainable params: 1103544
Epoch = 0, Avg Train Loss = 0.5918, Avg val loss = 0.4833, Time = 10.82s
Epoch = 1, Avg Train Loss = 0.5021, Avg val loss = 0.4737, Time = 6.63s
Epoch = 2, Avg Train Loss = 0.4555, Avg val loss = 0.4797, Time = 6.49s
0.7586330131784678


## Рекуррентные сети

![rnn](http://colah.github.io/posts/2015-08-Understanding-LSTMs/img/LSTM3-chain.png)  
*From [(Understanding LSTM Networks)](http://colah.github.io/posts/2015-08-Understanding-LSTMs)*

In [None]:
class RnnModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim=32, filters=((2, 10), (3, 8))):
        super().__init__()
        
        self.embeddings_layer = nn.Embedding(vocab_size, embedding_dim)
        self.lstm_layer = nn.LSTM(embedding_dim, embedding_dim, batch_first=True)
        self.dropout_layer = nn.Dropout(0.2)
        self.out_layer = nn.Linear(embedding_dim, 2)

    def forward(self, inputs):
        projections = self.embeddings_layer.forward(inputs)
        output, (final_hidden_state, final_cell_state) = self.lstm_layer(projections)
        hidden = self.dropout_layer(final_hidden_state[-1])
        output = self.out_layer.forward(hidden)
        return output

model = RnnModel(vocabulary.size, 64)
train_model(model, train_texts, train_labels, val_texts, val_labels)
test_model(model, test_texts, test_labels)

Trainable params: 1134082
Epoch = 0, Avg Train Loss = 0.6185, Avg val loss = 0.4844, Time = 5.64s
Epoch = 1, Avg Train Loss = 0.4971, Avg val loss = 0.4782, Time = 4.69s
Epoch = 2, Avg Train Loss = 0.4488, Avg val loss = 0.4833, Time = 4.65s
0.7616707616707616


# Генерация имён: 


In [None]:
!wget https://download.pytorch.org/tutorial/data.zip
!unzip data.zip

--2022-08-22 18:01:11--  https://download.pytorch.org/tutorial/data.zip
Resolving download.pytorch.org (download.pytorch.org)... 99.86.38.96, 99.86.38.106, 99.86.38.72, ...
Connecting to download.pytorch.org (download.pytorch.org)|99.86.38.96|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2882130 (2.7M) [application/zip]
Saving to: ‘data.zip’


2022-08-22 18:01:11 (63.6 MB/s) - ‘data.zip’ saved [2882130/2882130]

Archive:  data.zip
   creating: data/
  inflating: data/eng-fra.txt        
   creating: data/names/
  inflating: data/names/Arabic.txt   
  inflating: data/names/Chinese.txt  
  inflating: data/names/Czech.txt    
  inflating: data/names/Dutch.txt    
  inflating: data/names/English.txt  
  inflating: data/names/French.txt   
  inflating: data/names/German.txt   
  inflating: data/names/Greek.txt    
  inflating: data/names/Irish.txt    
  inflating: data/names/Italian.txt  
  inflating: data/names/Japanese.txt  
  inflating: data/names/Korean.txt  

In [None]:
import os
from collections import defaultdict
def load_names(directory):
    dataset = defaultdict(list)
    languages = {}
    for language_num, file_name in enumerate(os.listdir(directory)):
        languages[language_num] = file_name
        file_name = os.path.join(directory, file_name)
        with open(file_name, "r") as r:
            for line in r:
                dataset[language_num].append(line.strip())
    return dataset, languages

dataset, languages = load_names("data/names")

In [None]:
dataset

defaultdict(list,
            {0: ['Abana',
              'Abano',
              'Abarca',
              'Abaroa',
              'Abascal',
              'Abasolo',
              'Abel',
              'Abelló',
              'Aberquero',
              'Abreu',
              'Acosta',
              'Agramunt',
              'Aiza',
              'Alamilla',
              'Albert',
              'Albuquerque',
              'Aldana',
              'Alfaro',
              'Alvarado',
              'Álvarez',
              'Alves',
              'Amador',
              'Andreu',
              'Antúnez',
              'Aqua',
              'Aquino',
              'Araújo',
              'Araullo',
              'Araya',
              'Arce',
              'Arechavaleta',
              'Arena',
              'Aritza',
              'Armando',
              'Arreola',
              'Arriola',
              'Asis',
              'Asturias',
              'Avana',
              'Azarola',
     

In [None]:
import random
train_names = []
val_names = []
train_labels = []
val_labels = []
for language_num, names in dataset.items():
    random.shuffle(names)
    border = int(len(names) * 0.9)
    train_names.extend(names[:border])
    val_names.extend(names[border:])
    train_labels.extend([language_num] * border)
    val_labels.extend([language_num] * (len(names) - border))

In [None]:
char_set = ["<pad>"] + list({ch for name in train_names + val_names for ch in name})
print(char_set)

['<pad>', 'Ż', 'ò', 'ü', "'", 'ż', 'É', 'ú', 'J', 'X', 'y', 'Ś', 't', 'G', 'd', 'c', 'K', 'ł', 'q', 'k', ':', 'õ', 'a', 'H', '\xa0', '1', 'n', 'x', 'T', 'à', 'R', ',', 'M', 'Y', '-', 'ö', 'e', 'U', 'V', 'v', 'z', '/', 'Á', 'b', 'u', 'f', 'í', 'è', 'l', 'ç', 'ń', 'ê', 'L', 's', 'P', 'ì', 'Q', 'ą', 'D', 'ß', 'A', 'p', 'i', 'N', 'Z', 'g', 'O', 'B', 'I', 'ä', 'E', 'j', 'é', 'F', 'ñ', 'ó', 'ã', 'm', 'r', 'o', 'w', 'S', 'W', 'C', 'á', 'ù', ' ', 'h']


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import time
import numpy as np

def get_next_gen_batch(names, labels, max_seq_len=100, batch_size=128):
    indices = np.arange(len(names))
    np.random.shuffle(indices)
    batch_begin = 0
    while batch_begin < len(names):
        batch_indices = indices[batch_begin: batch_begin + batch_size]
        batch = []
        batch_labels = []
        batch_max_len = 0
        for data_ind in batch_indices:
            batch_labels.append(labels[data_ind])
            sample = [char_set.index(ch) for ch in names[data_ind]][:max_seq_len]
            batch_max_len = max(batch_max_len, len(sample))
            sample += [0] * (max_seq_len - len(sample))
            batch.append(sample)
        batch_begin += batch_size
        batch = torch.cuda.LongTensor(batch)[:, :batch_max_len]
        yield batch, torch.cuda.LongTensor(batch_labels)


def train_gen_model(model, names, labels, val_names, val_labels, epochs_count=10, 
                loss_every_nsteps=1000, lr=0.01, save_path="model.pt", device_name="cuda"):
    params_count = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Trainable params: {}".format(params_count))
    device = torch.device(device_name)
    model = model.to(device)
    total_loss = 0
    start_time = time.time()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_function = nn.CrossEntropyLoss().cuda()
    prev_avg_val_loss = None
    for epoch in range(epochs_count):
        model.train()
        for step, (batch, batch_labels) in enumerate(get_next_gen_batch(names, labels)):
            logits = model(batch_labels, batch)[:, :-1] # Прямой проход
            logits = logits.transpose(1, 2)
            loss = loss_function(logits, batch) # Подсчёт ошибки
            loss.backward() # Подсчёт градиентов dL/dw
            optimizer.step() # Градиентный спуск или его модификации (в данном случае Adam)
            optimizer.zero_grad() # Зануление градиентов, чтобы их спокойно менять на следующей итерации
            total_loss += loss.item()
        val_total_loss = 0
        val_batch_count = 0
        model.eval()
        for _, (batch, batch_labels) in enumerate(get_next_gen_batch(val_names, val_labels)):
            logits = model(batch_labels, batch)[:, :-1] # Прямой проход
            logits = logits.transpose(1, 2)
            val_total_loss += loss_function(logits, batch) # Подсчёт ошибки
            val_batch_count += 1
        avg_val_loss = val_total_loss/val_batch_count
        print("Epoch = {}, Avg Train Loss = {:.4f}, Avg val loss = {:.4f}, Time = {:.2f}s".format(epoch, total_loss / loss_every_nsteps, avg_val_loss, time.time() - start_time))
        total_loss = 0
        start_time = time.time()

        if prev_avg_val_loss is not None and avg_val_loss > prev_avg_val_loss:
            model.load_state_dict(torch.load(save_path))
            model.eval()
            break
        prev_avg_val_loss = avg_val_loss
        torch.save(model.state_dict(), save_path)

In [None]:
import torch
from torch import nn

class RnnGenModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim=32):
        super().__init__()
        
        self.embeddings_layer = nn.Embedding(vocab_size, embedding_dim)
        self.lstm_layer = nn.LSTM(embedding_dim+1, embedding_dim, batch_first=True)
        self.out_layer = nn.Linear(embedding_dim, vocab_size)

    def forward(self, labels, inputs):
        padding = torch.zeros((inputs.size(0), ), device="cuda", dtype=torch.int64)
        padding = padding.unsqueeze(1)
        inputs = torch.cat((padding, inputs), 1)
        projections = self.embeddings_layer.forward(inputs)
        labels = labels.repeat(projections.size(1), 1).transpose(0, 1).unsqueeze(2).float()
        projections = torch.cat((projections, labels), 2)
        output, _= self.lstm_layer(projections)
        output = self.out_layer.forward(output)
        return output

model = RnnGenModel(len(char_set))
train_gen_model(model, train_names, train_labels, val_names, val_labels)

Trainable params: 14296
Epoch = 0, Avg Train Loss = 0.2493, Avg val loss = 1.3631, Time = 0.59s
Epoch = 1, Avg Train Loss = 0.1984, Avg val loss = 1.3254, Time = 0.58s
Epoch = 2, Avg Train Loss = 0.1915, Avg val loss = 1.2814, Time = 0.60s
Epoch = 3, Avg Train Loss = 0.1867, Avg val loss = 1.3006, Time = 0.57s


In [None]:
print(languages)

{0: 'Spanish.txt', 1: 'Dutch.txt', 2: 'Greek.txt', 3: 'Japanese.txt', 4: 'Irish.txt', 5: 'Polish.txt', 6: 'English.txt', 7: 'Czech.txt', 8: 'Italian.txt', 9: 'Scottish.txt', 10: 'Arabic.txt', 11: 'Portuguese.txt', 12: 'Vietnamese.txt', 13: 'Chinese.txt', 14: 'Korean.txt', 15: 'Russian.txt', 16: 'German.txt', 17: 'French.txt'}


In [None]:
seed = torch.zeros((1, 1), dtype=torch.int64, device="cuda")
first_letter = 'D'
seed[0][0] = char_set.index(first_letter)

lang = torch.zeros((1, ), dtype=torch.int64, device="cuda")
lang[0] = 7

name = ""
next_char = first_letter
while next_char != "<pad>":
    name += next_char
    logits = model(lang, seed)
    index = torch.max(logits[0][len(name)], dim=0)[1]
    index = index.unsqueeze(0).unsqueeze(0)
    seed = torch.cat((seed, index), 1)
    next_char = char_set[index]

print(name)

Daner
