<a href="https://colab.research.google.com/github/Negfir/Bayesian-GA/blob/main/PGM_Transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setup

In [22]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import pandas as pd
import math 
from pomegranate import BayesianNetwork
import time
np.random.seed(1234)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

tf.debugging.set_log_device_placement(True)

tf.random.set_seed(
    7
)


Num GPUs Available:  1


In [1]:
!pip install pomegranate
# !pip uninstall numpy
# !pip install numpy


Collecting pomegranate
[?25l  Downloading https://files.pythonhosted.org/packages/a7/65/98bbcc034daac7b584e82a0f91f140dbb9ef502a1b7c96f16f3deb847232/pomegranate-0.14.4-cp37-cp37m-manylinux2010_x86_64.whl (17.9MB)
[K     |████████████████████████████████| 17.9MB 1.2MB/s 
Collecting numpy>=1.20.0
[?25l  Downloading https://files.pythonhosted.org/packages/73/ef/8967d406f3f85018ceb5efab50431e901683188f1741ceb053efcab26c87/numpy-1.20.2-cp37-cp37m-manylinux2010_x86_64.whl (15.3MB)
[K     |████████████████████████████████| 15.3MB 356kB/s 
[31mERROR: tensorflow 2.4.1 has requirement numpy~=1.19.2, but you'll have numpy 1.20.2 which is incompatible.[0m
[31mERROR: datascience 0.10.6 has requirement folium==0.2.1, but you'll have folium 0.8.3 which is incompatible.[0m
[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.9 which is incompatible.[0m
Installing collected packages: numpy, pomegranate
  Found existing installation: numpy 1.19.5
  

## Implement a Transformer block as a layer

In [2]:

class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


## Implement embedding layer

Two seperate embedding layers, one for tokens, one for token index (positions).

In [3]:

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions


## Download and prepare dataset

In [4]:
vocab_size = 20000  # Only consider the top 20k words
maxlen = 200  # Only consider the first 200 words of each movie review
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=vocab_size)
print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


25000 Training sequences
25000 Validation sequences


## Create classifier model using transformer layer

Transformer layer outputs one vector for each time step of our input sequence.
Here, we take the mean across all time steps and
use a feed forward network on top of it to classify text.

In [None]:

embed_dim = 32  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer

inputs = layers.Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(2, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)


Executing op RandomUniform in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Sub in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Mul in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Add in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RandomUniform in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Sub in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Mul in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Add in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:

## Train and Evaluate

In [23]:
embed_dim = 32  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer

inputs = layers.Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(2, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)


model.compile("adam", "sparse_categorical_crossentropy", metrics=["accuracy"])
batch = 32
import time

start = time.time()

history = model.fit(
    x_train, y_train, batch_size=batch, epochs=2
)

end = time.time()
print(end - start)

Executing op RandomUniform in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Sub in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Mul in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Add in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RandomUniform in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Sub in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Mul in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Add in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:

# **Optimization**

In [24]:
def fitnessFunc(chromosome):
    """F1 Sphere model
    unimodal, symmetric, separable"""
    fitness = 0
    for i in range(len(chromosome)):
        fitness += chromosome[i]**2
    return fitness #, fitness+3

In [38]:
from numpy import random
class EncoderDecoder:
  def __init__(self, decimal_rep, binary_rep):
    self.decimal_rep = np.array(decimal_rep)
    self.binary_rep = np.array(binary_rep)

  def encode(self,row):
    np.array(row)
    for i,r in enumerate(self.decimal_rep):
      if np.array_equal(r,row):
        return self.binary_rep[i]
    return self.binary_rep[random.randint(len(self.binary_rep))]
  
  def decode(self,row):
    np.array(row)
    for i,r in enumerate(self.binary_rep):
      
      if np.array_equal(r,row):
        return self.decimal_rep[i]
    return self.decimal_rep[random.randint(len(self.decimal_rep))]

  def getDecimal(self):
    return self.decimal_rep

  def getBinary(self):
    return self.binary_rep

  def setDecimal(self,inp):
    self.decimal_rep = inp

  def setBinary(self,inp):
   self.binary_rep = inp




In [39]:
embed_dim = [8, 16, 32, 64, 128, 256, 512, 768]
num_heads = [2, 3, 4, 5, 6, 8, 10, 12]
ff_dim = [4, 8, 16, 32, 64, 128, 256, 512]

def bitEncoder(var):
  lenght = len(var)
  encoded = []
  for i,v in enumerate(var):
      encoded.append(np.binary_repr(i, width=math.ceil(np.log2(lenght))))
  return encoded

def bitDecoder(var):
  lenght = len(var)
  decoded = []
  for i,v in enumerate(var):
      decoded.append(np.base_repr(i, 10))
  print( decoded)

bitEncoder(embed_dim)

['000', '001', '010', '011', '100', '101', '110', '111']

In [69]:



def sample(chromosome, sample_size):
    idx = np.random.randint(low=0, high=chromosome.shape[0], size=sample_size)
    initial_population = []
    
    for i in idx:
        # print("Selected samples are:",chromosome[i])
        fit = fitnessFunc(chromosome[i])
        initial_population.append(list(np.append(chromosome[i],fit)))
    initial_population = np.asarray(initial_population)
    # print(initial_population)
    return initial_population


def getFitness(chromosome):
    initial_population = [] 
    for i in range(len(chromosome)):
        fit = fitnessFunc(chromosome[i])
        initial_population.append(list(np.append(chromosome[i],fit)))
    initial_population = np.asarray(initial_population)
    return initial_population

In [11]:
!apt install libgraphviz-dev
!pip install pygraphviz

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following additional packages will be installed:
  libgail-common libgail18 libgtk2.0-0 libgtk2.0-bin libgtk2.0-common
  libgvc6-plugins-gtk libxdot4
Suggested packages:
  gvfs
The following NEW packages will be installed:
  libgail-common libgail18 libgraphviz-dev libgtk2.0-0 libgtk2.0-bin
  libgtk2.0-common libgvc6-plugins-gtk libxdot4
0 upgraded, 8 newly installed, 0 to remove and 31 not upgraded.
Need to get 2,120 kB of archives.
After this operation, 7,128 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/main amd64 libgtk2.0-common all 2.24.32-1ubuntu1 [125 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/main amd64 libgtk2.0-0 amd64 2.24.32-1ubuntu1 [1,769 kB]
Get:3 http://archive.ubuntu.com/ubuntu bionic/main amd64 libgail18 amd64 2.24.32-1ubuntu1 [14.2 kB]
Get:4 http://archive.ubuntu.com/ubuntu bionic/main amd64 libgail-common amd64 2.24.32

In [41]:
def sortSelect(population, percentage):
    df = pd.DataFrame(population)
    
    df = df.sort_values(by=[population.shape[1]-1])
    X_raw = df.drop(columns=[population.shape[1]-1]).values
    slice_ = math.floor(population.shape[0]*percentage)
    top_pop = X_raw[:slice_]
    return top_pop 

def binaryEncoder(chrom):
    pop = []   
    for n in chrom:
        arr = []
        bin = data.encode(n)
        for item in bin:
          digits =  [x for x in str(item)]
          arr = np.append(arr,digits)
        pop.append(list(arr))
    return pop


def seperator(stringIn,NUM_VARIABLES,VAR_SIZE):
  tmp = []
  for i in range(NUM_VARIABLES):
    tmp.append( [''.join(stringIn[VAR_SIZE*i:VAR_SIZE*i+VAR_SIZE])])
  return([j for sub in tmp for j in sub])


[[ 0  0 -2]
 [ 1 -1  2]
 [ 0  2  2]
 [ 2  1 -2]
 [-2  4  0]
 [-2 -4 -1]
 [-1  4  2]
 [ 1  2 -4]
 [ 3  2 -3]
 [ 2  2  4]
 [ 1  4 -3]
 [-1  5 -1]
 [-4 -3  2]
 [ 2  5  1]
 [-5 -1  2]]


In [95]:

from pomegranate import BayesianNetwork



# print("Greedy")
# print("Time (s): ", t)
# print("P(D|M): ", p)
# model.plot()

embed_dim = [8, 16, 32, 64, 128, 256, 512, 768]
num_heads = [2, 3, 4, 5, 6, 8, 10, 12]
ff_dim = [4, 8, 16, 32, 64, 128, 256, 512]

NUM_VARIABLES = 3
VAR_SIZE = 4

variables = np.array(
  np.meshgrid(embed_dim, num_heads, ff_dim)).T.reshape(-1, 3)




var1 = np.array([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])
var2 = np.array([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])
var3 = np.array([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])

var1_bin = bitEncoder(var1)
var2_bin = bitEncoder(var2)
var3_bin = bitEncoder(var3)


variables = np.array(
  np.meshgrid(var1, var2, var3)).T.reshape(-1, 3)
variables_bin = np.array(
  np.meshgrid(var1_bin, var2_bin, var3_bin)).T.reshape(-1, 3)


data = EncoderDecoder(variables,variables_bin)

print(data.encode([0,0,0]))
print(data.decode(data.encode([0,5,2])))

####################################################
new_pop = variables

NUM_VARIABLES = 3
VAR_SIZE = 4
population = sample(new_pop, 60)
chrom = sortSelect(population,0.5)


for generation in range(5):
    X = binaryEncoder(chrom)

    model = BayesianNetwork.from_samples(X, algorithm='greedy') # << Default BNSL setting
    t = time.time() - tic
    p = model.log_probability(X).sum()

    sampled = model.sample(20)
    child = []
    for c in sampled: 
      child.append(list(data.decode(seperator(c,NUM_VARIABLES,VAR_SIZE))))

    new_pop =  np.concatenate((np.array(child),chrom), axis=0)
    chrom = sortSelect(getFitness(new_pop),0.5) 
print(chrom)

['0101' '0101' '0101']
[0 5 2]
[[ 0  0  0]
 [-1  0  0]
 [-1  0  0]
 [-1  0  0]
 [-1  0  0]
 [-1  0  0]
 [-1  0  0]
 [-1  0  0]
 [ 1  0  0]
 [-1  0  0]
 [ 1  1  0]
 [-1  0 -1]
 [ 0 -1 -1]
 [-1  0 -1]
 [-1  0  1]
 [ 0 -1 -1]
 [ 0  1  1]
 [-1  0 -1]
 [ 1  0 -1]
 [-1  0 -1]]


In [None]:
import time

start = time.time()
print("Evaluate on test data")
results = model.evaluate(x_val, y_val, batch_size=batch)
print("test loss, test acc:", results)
end = time.time()
print(end - start)

Evaluate on test data
Executing op ReadVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Identity in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op RangeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RepeatDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op FlatMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op TensorDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RepeatDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ZipDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ParallelMapDatasetV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ReadVariableOp in device /job:localhost/replica:0/task:0/device: