<a href="https://colab.research.google.com/github/Michael-Jimenez-C/Redes-neuronales-basadas-en-ADN/blob/main/DNA_based_NN_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## DNA-NNArquitecture

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TerminateOnNaN
from tensorflow import keras
import numpy as np

In [None]:

import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
data = sns.load_dataset('iris')
X = data[['sepal_length', 'sepal_width', 'petal_length','petal_width']]
Y = pd.get_dummies(data['species'])

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

In [None]:
encoding={
      "A":"0",
      "T":"1",
      "C":"2",
      "G":"3",
      '0':"A",
      '1':"T",
      '2':"C",
      '3':"G"
  }

complement = {
    "A":"T",
    "T":"A",
    "G":"C",
    "C":"G"
}


SIGN = {
    "A":1,
    "T":-1,
    "G":1,
    "C":-1,
}

FACT = {
    0:"linear",
    1:"relu",
    2:"sigmoid",
    3:"relu",
    4:"tanh",
    "linear":0,
    "relu":1,
    "sigmoid":2,
    "relu":3,
    "tanh":4
}

TOKEN_WIDTH=5

pm = .5
pc = .3

n_in=4
n_out=3


config = {
    'optimizer':'adam',
    'loss':'categorical_crossentropy',
    'metrics':['accuracy']
}

In [None]:
def DNA_encoder(f):
  def _f(num: float, signed = True):
    sign = ""
    if signed:
      sign = 'A'
      if num < 0:
        sign = 'T'
    tmp = f(abs(num))
    return sign+"".join([encoding[i] if i!='.' else '.' for i in tmp])
  return _f

def DNA_decoder(f):
  def _f(seq: str, signed = True):
    sign = 1
    if signed:
      sign = SIGN[seq[0]]
      seq = seq[1:]
    tmp = "".join([encoding[i] if i!='.' else '.' for i in seq])
    return sign*f(tmp)
  return _f

In [None]:
@DNA_encoder
def float_to_base4(num :float):
    integer_part = int(num)
    fractional_part = num - integer_part

    base4_integer = ""
    if integer_part == 0:
        base4_integer = "0"
    else:
        while integer_part > 0:
            base4_integer = str(integer_part % 4) + base4_integer
            integer_part //= 4

    base4_fractional = ""
    while fractional_part > 0 and len(base4_fractional) < TOKEN_WIDTH:
        fractional_part *= 4
        digit = int(fractional_part)
        base4_fractional += str(digit)
        fractional_part -= digit
    return base4_integer.rjust(TOKEN_WIDTH,'0') + "." + (base4_fractional if base4_fractional else "").ljust(TOKEN_WIDTH,'0')

@DNA_decoder
def base4_to_float(base4):
    if "." in base4:
        integer_part, fractional_part = base4.split(".")
    else:
        integer_part, fractional_part = base4, ""

    base10_integer = 0
    for i, digit in enumerate(reversed(integer_part)):
        base10_integer += int(digit) * (4 ** i)

    base10_fractional = 0
    for i, digit in enumerate(fractional_part):
        base10_fractional += int(digit) * (4 ** -(i + 1))

    return base10_integer + base10_fractional

In [None]:
TOKENLENGHT = TOKEN_WIDTH
FRAMELENGHT = 2*TOKENLENGHT

class Frame:
  def __init__(self, nn: int,function: str):
    self.nn: int = float_to_base4(nn,signed=False).split('.')[0]
    self.function: int = float_to_base4(FACT[function] % 5,signed=False).split('.')[0]

  def get(self)->str:
    return self.nn+self.function

  @staticmethod
  def from_frame(frame: str)->tuple:
    assert len(frame) == FRAMELENGHT, "Expected frame lenght%i, given %i"%(FRAMELENGHT, len(frame))
    nn = base4_to_float(frame[:TOKENLENGHT],signed=False)
    function = base4_to_float(frame[TOKENLENGHT+1:],signed=False) % 5
    return nn,function

  def __repr__(self):
    return self.get()


class DNANN:
  def __init__(self, topology, functions):
    assert len(topology)==len(functions), "Activation function and topology must have same dimensions %i %i"%(len(topology),len(functions))
    self.frames = []
    for T,F in zip(topology, functions):
      self.frames.append(Frame(T,F))
  @staticmethod
  def from_frames(frames: str) -> None:
    assert len(frames) % FRAMELENGHT == 0, "FRAMES must be divisible by %i"%FRAMELENGHT
    frames_ = [frames[i:i+FRAMELENGHT] for i in range(len(frames)//FRAMELENGHT)]
    return frames_

  def get(self):
    return "".join([i.get() for i in self.frames])

  def __repr__(self):
    return self.get()

In [None]:
#Topologia inicial

redes = []

topologia =    [10,50,50,50,10]
f_activation = ['relu','relu','relu','relu','relu']

redes.append(DNANN(topologia, f_activation).get())

topologia =    [10,50,50,20,10]
f_activation = ['relu','sigmoid','relu','tanh','sigmoid']

redes.append(DNANN(topologia, f_activation).get())

In [None]:

def marca(msg):
  def _decorator(f):
    def _function_wrapper(x):
      print("inicio",msg)
      return f(x)
    return _function_wrapper
    print("final",msg)
  return _decorator

In [None]:
import random

def cruzar(sol1,sol2):
  d = np.random.randint(1, len(sol1))
  v1 = sol1[:d]+sol2[d:]
  v2 = sol2[:d]+sol1[d:]
  sol1_ = "".join(v1)
  sol2_ = "".join(v2)
  return [sol1_,sol2_]


@marca("cruzar")
def cruzarSols(soluciones):
  sl = soluciones.copy()
  for i in soluciones:
    for j in soluciones:
      if j!=i:
        if random.random()<pc:
          sl.extend(cruzar(i,j))
  return sl

In [None]:
def mutar(sol):
  ind = random.sample(range(len(sol)), random.randint(1,len(sol)))
  s = list(sol)
  for i in ind:
    s[i] = complement[s[i]]
  return "".join(s)

@marca("mutar")
def mutarSols(soluciones):
  sl = soluciones.copy()
  for i in soluciones:
    sl.append(mutar(i))
  return sl

In [None]:
def evaluar(soluciones):
  sols = []
  for i in soluciones:
    layers = [Dense(n_in, activation='linear')]
    for w in DNANN.from_frames(i):
      nn,act = Frame.from_frame(w)
      layers.append(Dense(nn,FACT[act]))
    layers.append(Dense(n_out, activation='softmax'))

    red = Sequential(layers)
    red.compile(**config)

    hist = red.fit(
        X_train,
        y_train,
        verbose= False,
        batch_size=96,
        epochs=2000,
        validation_split=.3,
        callbacks=[
        EarlyStopping(monitor="val_loss",restore_best_weights=True),
        TerminateOnNaN()
    ] )

    sols.append([i, hist.history['val_loss'][-1]])
  return sols

In [None]:
FE = []
for i in range(4):
  print('Entrenando',i)
  redes = cruzarSols(redes)
  redes = mutarSols(redes)
  temp = evaluar(redes)
  temp = sorted(temp, key=lambda x: x[1])[:5]
  redes = [i[0] for i in temp]
  FE.append(min([i[1] for i in temp]))


In [None]:
import matplotlib.pyplot as plt
sns.scatterplot(x='generación',y='val_loss',data = {'generación':list(range(len(FE))),'val_loss':FE})
sns.lineplot(x='generación',y='val_loss', color = 'gray',data = {'generación':list(range(len(FE))),'val_loss':FE})
plt.xticks([0,1,2,3])
plt.grid()
plt.savefig('Grafica error por generacion.pdf')

In [None]:
tmp = [Frame.from_frame(i) for i in DNANN.from_frames(temp[0][0])]
[(i[0], FACT[i[1]]) for i in tmp], temp[0]

In [None]:
def DNANET(solucion):
  layers = [Dense(n_in, activation='linear')]
  for w in DNANN.from_frames(solucion):
    nn,act = Frame.from_frame(w)
    layers.append(Dense(nn,FACT[act]))
  layers.append(Dense(n_out, activation='softmax'))

  red = Sequential(layers)
  red.compile(**config)

  red.fit(
      X_train,
      y_train,
      batch_size=96,
      epochs=2000,
      validation_split=.3,
      callbacks=[
      EarlyStopping(monitor="val_loss",restore_best_weights=True),
      TerminateOnNaN()
  ] )
  return red

red = DNANET(temp[0][0])

In [None]:
np.argmax(red.predict(X_test),axis=1)-np.argmax(y_test, axis=1)