In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization, Dropout, Layer
from tensorflow.keras.layers import Embedding, Input, GlobalAveragePooling1D, Dense
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential, Model
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

class Sigmoid:
    def __init__(self):
        pass
    def forward(self,inputs):
        self.input = inputs
        self.output = 1/(1+np.exp(-inputs))
    def backward(self,b_input):
        self.b_output = b_input*self.output*(1-self.output)

class ReLU:
    def __init__(self):
        pass
    
    def forward(self,inputs):
        self.output = np.maximum(0,inputs)
        self.input = inputs
    
    def backward(self,b_input):
        self.b_output = b_input
        self.b_output[self.input<=0] = 0

In [2]:
class TransformerBlock(Layer):
    def __init__(self, d_model, num_heads,af, neuron_n, dropout_rate,norm_existance):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.layernorm = LayerNormalization(epsilon=1e-6)
        
        if len(neuron_n)==0:
            self.feedforward = Sequential([Dense(d_model),])
        elif len(neuron_n)==1:
            self.feedforward = Sequential([Dense(neuron_n[0], activation=af[0]),Dropout(dropout_rate[0]),Dense(d_model),])

        else:
            self.feedforward = Sequential([Dense(neuron_n[0], activation=af[0]),Dropout(dropout_rate[0]), 
                                       Dense(neuron_n[1], activation=af[1]),Dropout(dropout_rate[1]),Dense(d_model),])

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        if  norm_existance[0]==1 :
            out1 = self.layernorm(inputs + attn_output)
        else :
            out1 = inputs + attn_output
        feedforward_output = self.feedforward(out1)
        if  norm_existance[1]==1 :
            re = self.layernorm(out1 + feedforward_output)
        else:
            re = out1 + feedforward_output

        return re

In [3]:
class TokenAndPositionEmbedding(Layer):
    def __init__(self, maxlen, vocab_size, d_model):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = Embedding(input_dim=vocab_size, output_dim=d_model)
        self.pos_emb = Embedding(input_dim=maxlen, output_dim=d_model)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [4]:
vocab_size = 20000  # Only consider the top 20k words
maxlen = 200  # Only consider the first 200 words of each movie review

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)
print(len(x_train), "Training sequences")
print(len(x_test), "Test sequences")

25000 Training sequences
25000 Test sequences


In [5]:
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)

In [6]:
#Parameters=[num_layeroftrans,num_attentionhead,num_hiddenfeedforward,num_hiddenffn,name_actfunc,num_neuron,prob_drop,d_model,norm_existance]

def NeuralNetworkTransformer(chromo):
    d_model = chromo[7]   # Embedding size for each token
    

    inputs = Input(shape=(maxlen,))
    embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, d_model)
    x = embedding_layer(inputs)
    
    if chromo[0]==1:
        transformer_block1 = TransformerBlock(d_model, chromo[1][0] ,chromo[4][0], chromo[5][0],chromo[6][0],chromo[8][0])
        x = transformer_block1(x)
        
    elif chromo[0]==2:
        transformer_block1 = TransformerBlock(d_model, chromo[1][0],chromo[4][1], chromo[5][1],chromo[6][1],chromo[8][1])
        x = transformer_block1(x)
        
        transformer_block2 = TransformerBlock(d_model, chromo[1][1],chromo[4][1], chromo[5][1],chromo[6][1],chromo[8][1])
        x = transformer_block2(x)
        
        
    else:
        transformer_block1 = TransformerBlock(d_model, chromo[1][0], chromo[4][2],chromo[5][2],chromo[6][2],chromo[8][2])
        x = transformer_block1(x)
        
        transformer_block2 = TransformerBlock(d_model, chromo[1][1],chromo[4][2], chromo[5][2],chromo[6][2],chromo[8][2])
        x = transformer_block2(x)
        
        transformer_block3 = TransformerBlock(d_model, chromo[1][2], chromo[4][2],chromo[5][2],chromo[6][2],chromo[8][2])
        x = transformer_block3(x)
    
    #transformer_block2 = TransformerBlock(d_model, num_heads2, ff_dim21)
    #x = transformer_block2(x)
    x = GlobalAveragePooling1D()(x)   # dont edit this line
    if chromo[3]==1:
        
        x = Dense(chromo[5][3], activation=chromo[4][3])(x)
        x = Dropout(chromo[6])(x)
   
    outputs = Dense(2, activation="softmax")(x)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    
    #train
    history = model.fit(x_train, y_train, batch_size=64, epochs=5, )

    #test
    results = model.evaluate(x_test, y_test, verbose=2)
    return results
    #for name, value in zip(model.metrics_names, results):
       # print("%s: %.4f" % (name, value))

In [7]:
num_layeroftrans = [1, 2, 3]
num_attentionhead = [1, 2, 4, 8]
num_hiddenfeedforward = [0, 1, 2]
num_hiddenffn = [0, 1]
name_actfunc = ["relu", "sigmoid"]
num_neuron = [5, 10, 20, 30]
prob_drop = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
d_model = [16, 32, 64, 128]
norm_existance = [0, 1]

Parameters=[num_layeroftrans,num_attentionhead,num_hiddenfeedforward,num_hiddenffn,name_actfunc,num_neuron,prob_drop,d_model,norm_existance]


In [8]:
class Individual():  
    def __init__(self, chromosome,Fitness):
        self.chromosome = chromosome 
        self.fitness = Fitness
  

In [9]:
import sys
import random
def create_gnome():
    num_layeroftrans = [1, 2, 3]
    num_attentionhead = [1, 2, 4, 8]
    num_hiddenfeedforward = [0, 1, 2]
    num_hiddenffn = [0, 1]
    name_actfunc = ["relu", "sigmoid"]
    num_neuron = [5, 10, 20, 30]
    prob_drop = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    d_model = [16, 32, 64, 128]
    norm_existance = [0, 1]
    n_layeroftrans = random.choice(num_layeroftrans)
    n_attentionhead=[]
    for i in range(n_layeroftrans):
        n_attentionhead.append(random.choice(num_attentionhead))
    n_hiddenfeedforward = []
    for i in range(n_layeroftrans):
        n_hiddenfeedforward.append(random.choice(num_hiddenfeedforward))
    n_hiddenffn = random.choice (num_hiddenffn)
    n_neuron =[]  
    for i in n_hiddenfeedforward:
        a=[]
        for j in range(i):
            a.append(random.choice(num_neuron))
        n_neuron.append(a)
    actfunc=[]
    for i in n_hiddenfeedforward:
        a=[]
        for j in range(i):
            a.append(random.choice(name_actfunc))
        actfunc.append(a)  
        
    if  n_hiddenffn==1:   
        a=[]
        a.append(random.choice(name_actfunc))
        actfunc.append(a)
        
        a=[]
        a.append(random.choice(num_neuron))
        n_neuron.append(a) 

    p_drop=[]
    for i in n_hiddenfeedforward:
        a=[]
        for j in range(i):
            a.append(random.choice(prob_drop))
        p_drop.append(a)    
    d_m = random.choice(d_model)
    norm_exist=[]
    for i in range(n_layeroftrans):
        a=[]
        for j in range(2):
            a.append(random.choice(norm_existance))
        norm_exist.append(a)    
        
    chromosome=[]
    chromosome.append(n_layeroftrans)
    chromosome.append(n_attentionhead)
    chromosome.append(n_hiddenfeedforward)
    chromosome.append(n_hiddenffn)
    chromosome.append(actfunc)
    chromosome.append(n_neuron)
    chromosome.append(p_drop)
    chromosome.append(d_m)
    chromosome.append(norm_exist)
    return chromosome

def Eval_Fitness(chromo):
    
    outputs=[]
    for i in range(iterations):
        outputs.append(network(chromo))
    sum1=0
    for i in range(iterations):
        sum1+=(outputs[i][1])
    fitness=sum1/iterations
    return fitness
    
def Binary_Tournament(population):
    ParentsPool = []
    for i in range(len(population)):
        par1=random.choice(population)
        par2=random.choice(population)
        if par1.fitness>=par2.fitness:
            ParentsPool.append( par1)
        else:
            ParentsPool.append( par2)
    return ParentsPool    

def UniformCrossover(par1,par2,Pc):
    children=[]
    child1=[]
    child2=[]
    for i in range(len(par1)):
        r=random.random()
        if r>=0 and r<Pc:
            child1.append(par1.chromosome[i] )
            child2.append(par2.chromosome[i])
        else :
            child2.append(par1.chromosome[i])
            child1.append(par2.chromosome[i])
    children.append(child1)
    children.append(child2)
    return children

def mutation(chromo,Pm):
    r=random.random()
    if r>=0 and r<Pc:
        idex=random.randrange(0,len(chromo))
        chromo[idex]=random.choice(Parameters[idex])
    return chromo

In [10]:
generations=10
epochs=5
pop_size=10
iterations=5


In [11]:
found = False
gen=0
population=[]
# create initial population and calculate Fitness
for i in range(pop_size):
    gnome = create_gnome()
    outputs=[]
    for j in range(iterations):
        outputs.append(NeuralNetworkTransformer(gnome))
    sum1=0
    for k in range(iterations):
        sum1+=(outputs[k][1])
    fitnes=sum1/iterations
    population.append(Individual(gnome,fitnes))

while (not found) :
    if (gen==generations):
        break

    for g in population:
        if g.fitness==1:
            found=True
            bestGene=g
            break
    if found==True:
        print(bestGene.chromosome)
        print(bestGene.Fitness)

        break
    
    ParentsPool=[]
    
    for i in range(pop_size):
        ParentsPool.append(Binary_Tournament(population))

    random.shuffle(ParentsPool)
    new_generation=[]
    for i in range(int(pSize/2)):
        par1=ParentsPool[i]
        par2=ParentsPool[len(ParentsPool)-i-1]
        Children=UniformCrossover(par1,par2,Pc)
        Children[0]=MutationFunc(Children[0],Pm)
        Children[1]=MutationFunc(Children[1],Pm)
        fitnes0=Eval_Fitness(Children[0])
        fitnes1=Eval_Fitness(Children[1])
        population.append(Individual(Children[0],fitnes0))
        population.append(Individual(Children[1],fitnes1))


    population = sorted(population, key = lambda x:x.fitness)
    bestGene=population[-1]
    population=population[int(popSize/2):popSize]
    random.shuffle(population)

    generation = generation + 1

if found==False:
    print(bestGene.chromosome)
    print(bestGene.Fitness)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
782/782 - 69s - loss: 0.5668 - accuracy: 0.8448 - 69s/epoch - 89ms/step
Epoch 1/5
Epoch 2/5
Epoch 3/5

KeyboardInterrupt: 