# Clasificación Categórica de niveles de obesidad a partir de datos estadísticos utilizando redes neuronales artificiales y algoritmos genéticos. 

Se importan las librerías que se usarán, pandas nos permite manejar datos usando dataframes, numpy nos permite realizar operaciones matemáticas eficientemente con python, tensorflow y sklearn son librerías de machine learning.

In [2]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import sklearn
from time import time
from random import random

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from sklearn.model_selection import train_test_split
from keras.layers import Dense
from tensorflow.keras.utils import to_categorical 
from keras.optimizers import RMSprop
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import make_column_transformer


Preparación de datos, keras no permite variables con formato de string asi que es necesario convertir las variables categóricas en etiquetas numéricas por ejemplo en el campo "gender" male podria mapearse al número 0 y female al número 1.

In [3]:
df = pd.read_csv("https://raw.githubusercontent.com/ApplesOranges/ObesityPrediction/main/ObesityData.csv")
df['Gender'] = pd.Categorical(df['Gender'])
df['Gender'] = df['Gender'].cat.codes
df['Gender'] = pd.Categorical(df['Gender'])
df['MTRANS'] = pd.Categorical(df['MTRANS'])
df['MTRANS'] = df['MTRANS'].cat.codes
df['MTRANS'] = pd.Categorical(df['MTRANS'])
df['family_history_with_overweight'] = pd.Categorical(df['family_history_with_overweight'])
df['family_history_with_overweight'] = df['family_history_with_overweight'].cat.codes
df['family_history_with_overweight'] = pd.Categorical(df['family_history_with_overweight'])
df['FAVC'] = pd.Categorical(df['FAVC'])
df['FAVC'] = df['FAVC'].cat.codes
df['FAVC'] = pd.Categorical(df['FAVC'])
df['CAEC'] = pd.Categorical(df['CAEC'])
df['CAEC'] = df['CAEC'].cat.codes
df['CAEC'] = pd.Categorical(df['CAEC'])
df['SMOKE'] = pd.Categorical(df['SMOKE'])
df['SMOKE'] = df['SMOKE'].cat.codes
df['SMOKE'] = pd.Categorical(df['SMOKE'])
df['SCC'] = pd.Categorical(df['SCC'])
df['SCC'] = df['SCC'].cat.codes
df['SCC'] = pd.Categorical(df['SCC'])
df['CALC'] = pd.Categorical(df['CALC'])
df['CALC'] = df['CALC'].cat.codes
df['CALC'] = pd.Categorical(df['CALC'])
df['NObeyesdad'] = pd.Categorical(df['NObeyesdad'])
df['NObeyesdad'] = df['NObeyesdad'].cat.codes
df['NObeyesdad'] = pd.Categorical(df['NObeyesdad'])


Separamos la variable a predecir de las demás, posteriormente dividimos la base de datos en datos de entrenamiento y datos para testing (20% del total).

In [4]:
target_column = ['NObeyesdad'] 
predictors = list(set(list(df.columns))-set(target_column))
x = df[predictors].values
y = df[target_column].values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=40)
print(x_train.shape); print(x_test.shape)

(1688, 16)
(423, 16)


Se hizo una función que recibe los parámetros para poder usarla junto con el genético

In [5]:
def trainNeuralNetwork(nLayers, nNeurons, nEpochs, LearningRate, moment):#se reciben los parametros de ajuste y regresa la función fitnness
  model = Sequential()
  model.add(Dense(nNeurons, activation='relu', input_dim=16))
  for i in range(nLayers):
    model.add(Dense(nNeurons, activation='relu'))
  model.add(Dense(7, activation='softmax'))

  model.compile(optimizer=RMSprop(learning_rate=LearningRate/1000, momentum=moment/100),
                loss='sparse_categorical_crossentropy', 
                metrics=['accuracy'])

  model.fit(x_train, y_train, epochs=nEpochs, verbose=0)
  test_loss, test_acc = model.evaluate(x_train,  y_train, verbose=2)
  print("teminado, épocas: ", nEpochs)
  return test_acc

Funciones de utilidad como conversión de base.

In [6]:
#utils
def bin2Dec(binary): #conversión de decimal a binario
    binary=(int)(binary)
    binary1 = binary 
    decimal, i, n = 0, 0, 0
    while(binary != 0): 
        dec = binary % 10
        decimal = decimal + dec * pow(2, i) 
        binary = binary//10
        i += 1
    return decimal 

def rellenado(lon, binario):#rellena con ceros un número binario para alcanzar una longitud
    bn = binario
    while len(bn)<lon:
      bn="0"+bn
    return bn

def dec2Bin(dec):#conversión de binario a decimal
  return bin(dec).replace("0b", "")      

In [7]:
#hijo
class Child:
  def __init__(self, code):
    self.code = code
    self.val = self.evaluate()

  def evaluate(self):#divide la cadena de números binarios, los pasa a decimales y evalua los parametros
    saved=self.code.split("-")
    return trainNeuralNetwork(bin2Dec(saved[0]), bin2Dec(saved[1]), bin2Dec(saved[2]), bin2Dec(saved[3]), bin2Dec(saved[4]))

Funciones pertenecientes al algoritmo genético.

In [8]:
#genetico
import random
from random import random as rand

def hijoAlt():#crea un hijo aleatorio con los parametros especificados
  cad=""
  aux=random.randint(1, 5)
  cad=cad+rellenado(3,dec2Bin(aux))
  cad+="-"
  aux=random.randint(7, 23)
  cad=cad+rellenado(5,dec2Bin(aux))
  cad+="-"
  aux=random.randint(100, 500)
  cad=cad+rellenado(9,dec2Bin(aux))
  cad+="-"
  aux=random.randint(1, 3)
  cad=cad+rellenado(2,dec2Bin(aux))
  cad+="-"
  aux=random.randint(1, 3)
  cad=cad+rellenado(2,dec2Bin(aux))
  return Child(cad)

def genHijos(num:int):#genera un número determinado de hijos
  lsHij=[]
  for i in range(num):
    lsHij.append(hijoAlt())
  return lsHij

def sort(children):#ordena los individuos por su función fitness del mayor al menor
  for i in range(1, len(children)):
    for j in range(0, len(children)-1):
      if(children[j].val<children[j+1].val):
        aux=children[j]
        children[j]=children[j+1]
        children[j+1]=aux
  return children

def combine(codex, codey, inf, sup):#evalua los rangos de un parametro
  while(True):
    mix = ""
    randy=random.randint(0, len(codex))
    mix+=codex[:randy]+codey[randy:]
    if (bin2Dec(mix) >= inf and bin2Dec(mix) <= sup):
      break
  return mix

def crossover(stud, normie):#operador de cruce
  atNew=[]
  atStud=stud.code.split("-")
  atNormie=normie.code.split("-")
  atNew.append(combine(atStud[0], atNormie[0], 1, 5));
  atNew.append(combine(atStud[1], atNormie[1], 7, 23));
  atNew.append(combine(atStud[2], atNormie[2], 100, 500));
  atNew.append(combine(atStud[3], atNormie[3], 1, 3));
  atNew.append(combine(atStud[4], atNormie[4], 1, 3));
  newCode = atNew[0]+"-"+atNew[1]+"-"+atNew[2]+"-"+atNew[3]+"-"+atNew[4]
  if(random.randint(0,100)<13):
    mutate(newCode)
  return Child(newCode)

def mutate(code):#operador de mutación 
  while True:
    randy=random.randint(0, len(code)-1)
    s=list(code)
    if code[randy] == '0':
      s[randy] = '1'
      break
    elif code[randy] == '1':
      s[randy] = '0'
      break
  return "".join(s)

def nextGen(children):#genera una nueva generación a partir de una ya creada
  sorted=sort(children)
  studs = []
  population = []
  cont = 0
  sum = 0
  for i in range(len(sorted)):
    sum+=sorted[i].val
  
  while(len(studs)<2):
    i=0
    while(i<len(sorted)):
      if((sorted[i].val/sum)>=rand()):
        studs.append(sorted[i])
        sorted.pop(i)
      else:
        i+=1
  for i in range(len(studs)):
    for j in range(len(sorted)):
      population.append(crossover(studs[i], sorted[j]))
  sorted=sorted+studs
  sorted=sorted+population
  sorted=sort(sorted)
  print(sorted)
  return sorted


In [9]:
from random import random as rand
rand()

8.998758149569852e-05

In [None]:
children=genHijos(8)
gens=0
inicio = int(time() * 1000)
children=sort(children)
while(children[0].val<0.98 or gens>5):
  children = nextGen(children)[:8]
  sum=0
  for c in children:
    print(c.code, c.val)
    sum+=c.val
  print("promedio: ", sum/len(children))
  gens+=1
fin = int(time() * 1000)
print("tiempo: ", fin-inicio/1000)   
print("mejor:", children[0].code, children[0].val)
print("numero de generaciones: ", gens)

53/53 - 0s - loss: 0.4218 - accuracy: 0.8288
teminado, épocas:  146
53/53 - 0s - loss: 0.2330 - accuracy: 0.9028
teminado, épocas:  431
