<a href="https://www.kaggle.com/code/aleksandrmorozov123/transformers?scriptVersionId=232777589" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Self-attention mechanizm is a foundational block of all transformer architectures**

In [1]:
import torch
from torch.nn.functional import softmax

In [2]:
# we start with 3 inputs, each with dimension 4
x = [
    [1, 0, 2, 0],
    [0, 3, 0, 3],
    [2, 2, 2, 2]
]

x = torch.tensor (x, dtype = torch.float32)
x

tensor([[1., 0., 2., 0.],
        [0., 3., 0., 3.],
        [2., 2., 2., 2.]])

In [3]:
# initialise weights
w_key = [
    [0, 1, 1],
    [2, 1, 0],
    [1, 0, 1],
    [3, 3, 1]
]
w_query = [
    [1, 0, 1],
    [2, 0, 3],
    [0, 2, 3],
    [0, 2, 2]
]
w_value = [
    [0, 2, 1],
    [0, 2, 0],
    [1, 3, 0],
    [2, 2, 0]
]

w_key = torch.tensor (w_key, dtype = torch.float32)
w_query = torch.tensor (w_query, dtype = torch.float32)
w_value = torch.tensor (w_value, dtype = torch.float32)

print ("Weights for key: \n", w_key)
print ("Weights for query: \n", w_query)
print ("Weights for value: \n", w_value)

Weights for key: 
 tensor([[0., 1., 1.],
        [2., 1., 0.],
        [1., 0., 1.],
        [3., 3., 1.]])
Weights for query: 
 tensor([[1., 0., 1.],
        [2., 0., 3.],
        [0., 2., 3.],
        [0., 2., 2.]])
Weights for value: 
 tensor([[0., 2., 1.],
        [0., 2., 0.],
        [1., 3., 0.],
        [2., 2., 0.]])


In [4]:
# adding the bias vector to the product of matrix multiplication
keys = x @ w_key
querys = x @ w_query
values = x @ w_value

print ("Keys: \n", keys)
print ("Querys: \n", querys)
print ("Values: \n", values)

Keys: 
 tensor([[ 2.,  1.,  3.],
        [15., 12.,  3.],
        [12., 10.,  6.]])
Querys: 
 tensor([[ 1.,  4.,  7.],
        [ 6.,  6., 15.],
        [ 6.,  8., 18.]])
Values: 
 tensor([[ 2.,  8.,  1.],
        [ 6., 12.,  0.],
        [ 6., 18.,  2.]])


In [5]:
# calculating attention scores
attn_scores = querys @ keys.T
print (attn_scores)

tensor([[ 27.,  84.,  94.],
        [ 63., 207., 222.],
        [ 74., 240., 260.]])


In [6]:
# calculate softmax
attn_scores_softmax = softmax (attn_scores, dim = -1)

print (attn_scores_softmax)

tensor([[7.9845e-30, 4.5398e-05, 9.9995e-01],
        [0.0000e+00, 3.0590e-07, 1.0000e+00],
        [0.0000e+00, 2.0612e-09, 1.0000e+00]])


In [7]:
attn_scores_softmax = [
    [0.0, 0.5, 0.5],
    [0.0, 1.0, 0.0],
    [0.0, 0.9, 0.1]
]
attn_scores_softmax = torch.tensor (attn_scores_softmax)

print (attn_scores_softmax)

tensor([[0.0000, 0.5000, 0.5000],
        [0.0000, 1.0000, 0.0000],
        [0.0000, 0.9000, 0.1000]])


In [8]:
# multiply scores with values
weighted_values = values[:, None] * attn_scores_softmax.T[:,:, None]
print (weighted_values)

tensor([[[ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[ 3.0000,  6.0000,  0.0000],
         [ 6.0000, 12.0000,  0.0000],
         [ 5.4000, 10.8000,  0.0000]],

        [[ 3.0000,  9.0000,  1.0000],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.6000,  1.8000,  0.2000]]])


In [9]:
# multiply scores with values of input 2 and input 3
outputs = weighted_values.sum (dim = 0)
print (outputs)

tensor([[ 6.0000, 15.0000,  1.0000],
        [ 6.0000, 12.0000,  0.0000],
        [ 6.0000, 12.6000,  0.2000]])


**Stable diffusion with Keras**

In [24]:
try:
  import tensorflow as tf
  print(tf.__version__)
except:
  !pip install tensorflow
  import tensorflow as tf
  print(tf.__version__)



TypeError: 'Word2Vec' object is not callable

In [26]:
!pip install keras_cv --upgrade --quiet

In [27]:
!pip install keras_core --upgrade --quiet

In [28]:
import time
import keras_cv
from tensorflow import keras
import matplotlib.pyplot as plt

In [29]:
model = keras_cv.models.StableDiffusion(img_width=512, img_height=512)

By using this model checkpoint, you acknowledge that its usage is subject to the terms of the CreativeML Open RAIL-M license at https://raw.githubusercontent.com/CompVis/stable-diffusion/main/LICENSE


In [30]:
# generating image with a prompt
images = model.text_to_image("sunset in a snow world", batch_size=3)


def plot_images(images):
    plt.figure(figsize=(20, 20))
    for i in range(len(images)):
        ax = plt.subplot(1, len(images), i + 1)
        plt.imshow(images[i])
        plt.axis("off")


plot_images(images)

Downloading data from https://github.com/openai/CLIP/blob/main/clip/bpe_simple_vocab_16e6.txt.gz?raw=true
[1m1356917/1356917[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


TypeError: Exception encountered when calling CLIPEncoderLayer.call().

[1mCould not automatically infer the output shape / dtype of 'clip_encoder_layer' (of type CLIPEncoderLayer). Either the `CLIPEncoderLayer.call()` method is incorrect, or you need to implement the `CLIPEncoderLayer.compute_output_spec() / compute_output_shape()` method. Error encountered:

Exception encountered when calling CLIPAttention.call().

[1mpred must not be a Python bool[0m

Arguments received by CLIPAttention.call():
  • inputs=tf.Tensor(shape=(None, 77, 768), dtype=float32)
  • attention_mask=None[0m

Arguments received by CLIPEncoderLayer.call():
  • args=('<KerasTensor shape=(None, 77, 768), dtype=float32, sparse=False, name=keras_tensor_1>',)
  • kwargs=<class 'inspect._empty'>