# Прохоренко Олександр ФІ-21мн
## Lab 4: Advanced Neural Nets

## 1. Генерація зображень (GAN)
### Fashion MNIST
### https://www.kaggle.com/datasets/zalando-research/fashionmnist

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

train_data = pd.read_csv('./fshnmnst/fashion-mnist_train.csv')
train_data.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [2]:
train_data.drop('label', axis=1, inplace=True)
print('input shape: ', train_data.shape[1])
train_data = train_data.to_numpy()
print(train_data.shape[0], 'train samples')
train_data = (train_data.astype(np.float32) - 127.5) / 127.5

input shape:  784
60000 train samples


In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import initializers
from keras import preprocessing
from keras.models import Sequential
from keras.layers import Dropout, Dense, LeakyReLU
from tensorflow.keras import layers

In [4]:
def build_discriminator():
    model = Sequential()
    model.add(Dense(1024, input_dim=784, kernel_initializer=initializers.RandomNormal(stddev=0.02)))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.3))
        
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.3))
    
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.3))
        
    model.add(Dense(1, activation='sigmoid'))
    return model

In [5]:
latent_dim = 100

def build_generator():
    model = Sequential()
    model.add(Dense(256, input_dim=latent_dim, kernel_initializer=initializers.RandomNormal(stddev=0.02)))
    model.add(LeakyReLU(alpha=0.2))
    
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.2))
    
    model.add(Dense(784, activation='tanh'))
    return model  

In [6]:
def plot_generated_images(epoch, generator):
    num_of_images = 10
    print('epoch: ', epoch)
    noise = np.random.normal(0, 1, size=[num_of_images, latent_dim])
    generated_images=generator.predict(noise, verbose=0).reshape(num_of_images, 28, 28)
    plt.figure(figsize=(20,2))
    for i in range(num_of_images):
        plt.subplot(1, 10, i+1)
        plt.imshow(generated_images[i], cmap = 'gray')
        plt.axis('off')
        plt.tight_layout()
    plt.show()

In [7]:
discriminator = build_discriminator()
generator = build_generator()
discriminator.trainable = False
gan_input = keras.Input(shape=(latent_dim,))
x = generator(gan_input)
gan_output = discriminator(x)
gan = keras.Model(inputs=gan_input, outputs=gan_output)
gan.compile(loss='binary_crossentropy', optimizer='adam')

In [8]:
discrim_losses = []

def train_model(epochs, batch_size):
    num_batches = int(train_data.shape[0]/batch_size)
    for ep in range(epochs+1):
        for i in range(num_batches):
            noise = np.random.normal(0, 1, size=[batch_size, latent_dim])
            image_batch = train_data[np.random.randint(0, train_data.shape[0], size=batch_size)]
            
            generated_images = generator.predict(noise, verbose=0)
            X = np.concatenate([image_batch, generated_images])

            y_dis = np.zeros(2*batch_size)
            y_dis[:batch_size] = 0.9

            discriminator.trainable = True
            d_loss = discriminator.train_on_batch(X, y_dis)
            discrim_losses.append(d_loss)

            noise = np.random.normal(0, 1, size=[batch_size, latent_dim])
            y_gen = np.ones(batch_size)
            discriminator.trainable = False
            gan.train_on_batch(noise, y_gen)
        if ep % 10 == 0:
            plot_generated_images(ep, generator)

In [None]:
epochs = 100
batch_size = 128
train_model(epochs, batch_size)

## 2.  Вирішити завдання машинного перекладу
### Ukrainian - English
### https://www.manythings.org/anki/

In [9]:
with open("./ukr-eng/ukr.txt") as f:
    lines = f.read().split("\n")[:-1]
text_pairs = []
for line in lines:
    eng, ukr, trash = line.split("\t")
    ukr = "[start] " + ukr + " [end]"
    text_pairs.append((eng, ukr))

In [10]:
import random
for _ in range(5):
    print(random.choice(text_pairs))

("They didn't see anything.", '[start] Вони нічого не бачили. [end]')
('Tom may be the traitor.', '[start] Можливо, зрадник — Том. [end]')
('He wants to live as long as he can.', '[start] Він хоче жити якомога довше. [end]')
('What do you like about it?', '[start] Що тобі в цьому подобається? [end]')
("There's nothing to be proud of.", '[start] Немає чим пишатися. [end]')


In [11]:
random.shuffle(text_pairs)
num_val_samples = int(0.15 * len(text_pairs))
num_train_samples = len(text_pairs) - 2 * num_val_samples
train_pairs = text_pairs[:num_train_samples]
val_pairs = text_pairs[num_train_samples : num_train_samples + num_val_samples]
test_pairs = text_pairs[num_train_samples + num_val_samples :]

print(f"{len(text_pairs)} total pairs")
print(f"{len(train_pairs)} training pairs")
print(f"{len(val_pairs)} validation pairs")
print(f"{len(test_pairs)} test pairs")

156173 total pairs
109323 training pairs
23425 validation pairs
23425 test pairs


In [12]:
import string
from keras.layers import TextVectorization
import re

strip_chars = string.punctuation
strip_chars = strip_chars.replace("[", "")
strip_chars = strip_chars.replace("]", "")

vocab_size = 15000
sequence_length = 20
batch_size = 64


def custom_standardization(input_string):
    lowercase = tf.strings.lower(input_string)
    return tf.strings.regex_replace(lowercase, "[%s]" % re.escape(strip_chars), "")


eng_vectorization = TextVectorization(
    max_tokens=vocab_size, output_mode="int", output_sequence_length=sequence_length,
)
ukr_vectorization = TextVectorization(
    max_tokens=vocab_size,
    output_mode="int",
    output_sequence_length=sequence_length + 1,
    standardize=custom_standardization,
)
train_eng_texts = [pair[0] for pair in train_pairs]
train_ukr_texts = [pair[1] for pair in train_pairs]
eng_vectorization.adapt(train_eng_texts)
ukr_vectorization.adapt(train_ukr_texts)

In [13]:
def format_dataset(eng, ukr):
    eng = eng_vectorization(eng)
    ukr = ukr_vectorization(ukr)
    return ({"encoder_inputs": eng, "decoder_inputs": ukr[:, :-1],}, ukr[:, 1:])


def make_dataset(pairs):
    eng_texts, ukr_texts = zip(*pairs)
    eng_texts = list(eng_texts)
    ukr_texts = list(ukr_texts)
    dataset = tf.data.Dataset.from_tensor_slices((eng_texts, ukr_texts))
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(format_dataset)
    return dataset.shuffle(2048).prefetch(16).cache()


train_ds = make_dataset(train_pairs)
val_ds = make_dataset(val_pairs)

In [14]:
for inputs, targets in train_ds.take(1):
    print(f'inputs["encoder_inputs"].shape: {inputs["encoder_inputs"].shape}')
    print(f'inputs["decoder_inputs"].shape: {inputs["decoder_inputs"].shape}')
    print(f"targets.shape: {targets.shape}")

inputs["encoder_inputs"].shape: (64, 20)
inputs["decoder_inputs"].shape: (64, 20)
targets.shape: (64, 20)


In [15]:
class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim
        )
        self.dense_proj = keras.Sequential(
            [layers.Dense(dense_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.supports_masking = True

    def call(self, inputs, mask=None):
        if mask is not None:
            padding_mask = tf.cast(mask[:, tf.newaxis, tf.newaxis, :], dtype="int32")
        attention_output = self.attention(
            query=inputs, value=inputs, key=inputs, attention_mask=padding_mask
        )
        proj_input = self.layernorm_1(inputs + attention_output)
        proj_output = self.dense_proj(proj_input)
        return self.layernorm_2(proj_input + proj_output)


class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, vocab_size, embed_dim, **kwargs):
        super().__init__(**kwargs)
        self.token_embeddings = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )
        self.position_embeddings = layers.Embedding(
            input_dim=sequence_length, output_dim=embed_dim
        )
        self.sequence_length = sequence_length
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim

    def call(self, inputs):
        length = tf.shape(inputs)[-1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_tokens = self.token_embeddings(inputs)
        embedded_positions = self.position_embeddings(positions)
        return embedded_tokens + embedded_positions

    def compute_mask(self, inputs, mask=None):
        return tf.math.not_equal(inputs, 0)


class TransformerDecoder(layers.Layer):
    def __init__(self, embed_dim, latent_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.latent_dim = latent_dim
        self.num_heads = num_heads
        self.attention_1 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim
        )
        self.attention_2 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim
        )
        self.dense_proj = keras.Sequential(
            [layers.Dense(latent_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.layernorm_3 = layers.LayerNormalization()
        self.supports_masking = True

    def call(self, inputs, encoder_outputs, mask=None):
        causal_mask = self.get_causal_attention_mask(inputs)
        if mask is not None:
            padding_mask = tf.cast(mask[:, tf.newaxis, :], dtype="int32")
            padding_mask = tf.minimum(padding_mask, causal_mask)

        attention_output_1 = self.attention_1(
            query=inputs, value=inputs, key=inputs, attention_mask=causal_mask
        )
        out_1 = self.layernorm_1(inputs + attention_output_1)

        attention_output_2 = self.attention_2(
            query=out_1,
            value=encoder_outputs,
            key=encoder_outputs,
            attention_mask=padding_mask,
        )
        out_2 = self.layernorm_2(out_1 + attention_output_2)

        proj_output = self.dense_proj(out_2)
        return self.layernorm_3(out_2 + proj_output)

    def get_causal_attention_mask(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size, sequence_length = input_shape[0], input_shape[1]
        i = tf.range(sequence_length)[:, tf.newaxis]
        j = tf.range(sequence_length)
        mask = tf.cast(i >= j, dtype="int32")
        mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
        mult = tf.concat(
            [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)],
            axis=0,
        )
        return tf.tile(mask, mult)

In [16]:
embed_dim = 256
latent_dim = 2048
num_heads = 8

encoder_inputs = keras.Input(shape=(None,), dtype="int64", name="encoder_inputs")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs)
encoder_outputs = TransformerEncoder(embed_dim, latent_dim, num_heads)(x)
encoder = keras.Model(encoder_inputs, encoder_outputs)

decoder_inputs = keras.Input(shape=(None,), dtype="int64", name="decoder_inputs")
encoded_seq_inputs = keras.Input(shape=(None, embed_dim), name="decoder_state_inputs")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs)
x = TransformerDecoder(embed_dim, latent_dim, num_heads)(x, encoded_seq_inputs)
x = layers.Dropout(0.5)(x)
decoder_outputs = layers.Dense(vocab_size, activation="softmax")(x)
decoder = keras.Model([decoder_inputs, encoded_seq_inputs], decoder_outputs)

decoder_outputs = decoder([decoder_inputs, encoder_outputs])
transformer = keras.Model(
    [encoder_inputs, decoder_inputs], decoder_outputs, name="transformer"
)

In [17]:
epochs = 4 # This should be at least 30 for convergence

transformer.summary()
transformer.compile(
    "rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
transformer.fit(train_ds, epochs=epochs, validation_data=val_ds)

Model: "transformer"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 encoder_inputs (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 positional_embedding (Position  (None, None, 256)   3845120     ['encoder_inputs[0][0]']         
 alEmbedding)                                                                                     
                                                                                                  
 decoder_inputs (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 transformer_encoder (Transform  (None, None, 256)   3155456     ['positional_embedding[

<keras.callbacks.History at 0x7fad34648ee0>

## Ось такий проміжний результат ми маємо, коли пройшли 4 епохи

In [22]:
ukr_vocab = ukr_vectorization.get_vocabulary()
ukr_index_lookup = dict(zip(range(len(ukr_vocab)), ukr_vocab))
max_decoded_sentence_length = 20


def decode_sequence(input_sentence):
    tokenized_input_sentence = eng_vectorization([input_sentence])
    decoded_sentence = "[start]"
    for i in range(max_decoded_sentence_length):
        tokenized_target_sentence = ukr_vectorization([decoded_sentence])[:, :-1]
        predictions = transformer([tokenized_input_sentence, tokenized_target_sentence])

        sampled_token_index = np.argmax(predictions[0, i, :])
        sampled_token = ukr_index_lookup[sampled_token_index]
        decoded_sentence += " " + sampled_token

        if sampled_token == "[end]":
            break
    return decoded_sentence


test_eng_texts = [pair[0] for pair in test_pairs]
for _ in range(30):
    input_sentence = random.choice(test_eng_texts)
    translated = decode_sequence(input_sentence)
    print(input_sentence," ", translated)

Why are you so busy?   [start] Чому ти такий зайнятий [end]
I wonder whether Tom has left or not.   [start] Мені цікаво чи Том колись пішов [end]
Everyone makes that mistake.   [start] Всі це [UNK] [end]
Are you tired now?   [start] Ти зараз втомився [end]
I was expecting a lot more.   [start] Я дуже багато чого знаю [end]
Does your father smoke?   [start] Мій батько [UNK] [end]
Tom knows who Mary's boyfriend is.   [start] Том знає хто [UNK] Мері [end]
We're cutting our costs.   [start] Ми [UNK] свою [UNK] [end]
She accompanied him to Japan.   [start] Вона [UNK] його до школи [end]
It's perfect.   [start] Це [UNK] [end]
I helped everybody.   [start] Я допоміг всі [end]
How do you use this camera?   [start] Як ти [UNK] свій [UNK] [end]
You're wealthy.   [start] Ти такий задоволений [end]
I'm hard working.   [start] Я дуже працює [end]
Does Tom have kids?   [start] Том має дітей [end]
Do you like my new shoes?   [start] Ти любиш свою новий окуляри [end]
What do you usually do after suppe

## 3. Експерименти з моделями бібліотеки HF Transformers (https://huggingface.co/) за допомогою Pipeline 

In [1]:
from transformers import pipeline
classifier = pipeline('sentiment-analysis')

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/268M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [12]:
print('My dog ate my homework ',classifier('My dog ate my homework'))
print('I love icecream ', classifier('I love icecream'))
print('I love icecream but not this one ', classifier('I love icecream but not this one'))
print('I love icecream but this particular one much more', classifier('I love icecream but this particular one much more'))
print('I love icecream but this particular one not so much', classifier('I love icecream but this particular one not so much'))
print('I don`t love icecream but this one is allright', classifier('I don`t love icecream but this one is allright'))

My dog ate my homework  [{'label': 'NEGATIVE', 'score': 0.9568425416946411}]
I love icecream  [{'label': 'POSITIVE', 'score': 0.9992756247520447}]
I love icecream but not this one  [{'label': 'NEGATIVE', 'score': 0.9574670195579529}]
I love icecream but this particular one much more [{'label': 'POSITIVE', 'score': 0.9903526902198792}]
I love icecream but this particular one not so much [{'label': 'NEGATIVE', 'score': 0.9980582594871521}]
I don`t love icecream but this one is allright [{'label': 'POSITIVE', 'score': 0.7439035773277283}]


In [1]:
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast

model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")

In [8]:
article_ru = "Прекрасные пирожки со вкусом клубники продаются в магазине у вокзала"
article_ua = "Чудові пиріжки зі смаком полуниці продаються у крамниці біля залізниці"
article_ro = "La magazinul din gara se vând plăcinte minunate cu aromă de căpșuni"
article_pl = "W sklepie w pobliżu stacj sprzedawane są pyszne paszteciki o smaku truskawkowym"

tokenizer.src_lang = "ru_RU"
encoded_ru = tokenizer(article_ru, return_tensors="pt")
generated_tokens = model.generate(**encoded_ru)
print(article_ru," ",tokenizer.batch_decode(generated_tokens, skip_special_tokens=True))

tokenizer.src_lang = "uk_UA"
encoded_ua = tokenizer(article_ua, return_tensors="pt")
generated_tokens = model.generate(**encoded_ua)
print(article_ua," ",tokenizer.batch_decode(generated_tokens, skip_special_tokens=True))

tokenizer.src_lang = "ro_RO"
encoded_ro = tokenizer(article_ro, return_tensors="pt")
generated_tokens = model.generate(**encoded_ro)
print(article_ro," ",tokenizer.batch_decode(generated_tokens, skip_special_tokens=True))

tokenizer.src_lang = "pl_PL"
encoded_pl = tokenizer(article_pl, return_tensors="pt")
generated_tokens = model.generate(**encoded_pl)
print(article_pl," ",tokenizer.batch_decode(generated_tokens, skip_special_tokens=True))

Прекрасные пирожки со вкусом клубники продаются в магазине у вокзала   ['Delicious strawberry-flavored cakes are sold at the train station shop']
Чудові пиріжки зі смаком полуниці продаються у крамниці біля залізниці   ["There's a great candy-flavored sausage that's sold in a railroad store."]
La magazinul din gara se vând plăcinte minunate cu aromă de căpșuni   ['Gorgeous chestnut-flavoured pies on sale at garage shop']
W sklepie w pobliżu stacj sprzedawane są pyszne paszteciki o smaku truskawkowym   ['Delicious strawberry-flavored patties are sold in a shop near the station']
