In [1]:
import random
import numpy as np
import pandas as pd
from random import randint

In [26]:
df = pd.read_csv('ingredients-dataset.csv')
df.head(10)

Unnamed: 0,No,Ingredient,Unit (gr),Protein(gr),Fat(gr),Carbohydrate(gr),Protein(cal),Fat(cal),Carbohydrate(cal),Price (IDR),Total Calories
0,0,Milled rice,100,8.4,1.7,77.2,33.6,15.3,308.8,1180,357.7
1,1,Young corn,100,5.1,0.7,31.5,20.4,6.3,126.0,2300,152.7
2,2,Duck,200,32.0,57.2,0.0,128.0,514.8,0.0,10000,642.8
3,3,Black glutinous rice,100,8.0,2.3,74.5,32.0,20.7,298.0,1750,350.7
4,4,Long beans,100,2.3,0.1,5.3,9.2,0.9,21.2,2500,31.3
5,5,Yellow sweet potato,100,0.5,0.4,25.1,2.0,3.6,100.4,950,106.0
6,6,Fresh cucumber,55,0.2,0.2,1.4,0.8,1.8,5.6,1000,8.2
7,7,Rhinocerote,100,1.2,0.3,7.1,4.8,2.7,28.4,964,35.9
8,8,Fresh soybeans,100,30.2,15.6,30.1,120.8,140.4,120.4,2900,381.6
9,9,Fresh spinach,71,0.9,0.4,2.9,3.6,3.6,11.6,3100,18.8


In [27]:
DOMAIN = len(df) - 1
LEN_CHROMOSOME = 15

In [28]:
def create_individual(N=DOMAIN, len_chromosome=LEN_CHROMOSOME):
    return [randint(0,N) for _ in range(len_chromosome)]

In [29]:
def get_population(n_population):
  population = [create_individual() for _ in range(n_population)]
  return population

In [30]:
def get_needs(age, weight, height, f_activity=1.7):
  amb = float(66.5 + (13.7 * weight) + (5 * height) - (6.8 * age))
  tee = float(amb * f_activity)

  # spared some space for seasonings
  const = 0.95
  protein = float(0.25 * tee * const)
  fat = float(0.15 * tee * const)
  carb = float(0.60 * tee * const)

  return protein, fat, carb

In [46]:
def get_nutrition_and_price(individual):
  sum_price = sum_protein = sum_carb = sum_fat = 0

  for allele in individual:
    sum_protein += df.iloc[allele]['Protein(cal)']
    sum_fat += df.iloc[allele]['Fat(cal)']
    sum_carb += df.iloc[allele]['Carbohydrate(cal)']
    sum_price +=  df.iloc[allele]['Price (IDR)']

  return [sum_protein, sum_fat, sum_carb], sum_price

def get_fitness(individual, needs, alpha_protein=5, alpha_fat=5, alpha_carb=5):
  nutrition, price = get_nutrition_and_price(individual)
  protein, fat, carb = needs
  penalty_protein = float(abs(nutrition[0] - protein))
  penalty_fat = float(abs(nutrition[1] - fat))
  penalty_carb = float(abs(nutrition[2] - carb))

  sum_penalties = (alpha_protein*penalty_protein) + (alpha_fat*penalty_fat) + (alpha_carb*penalty_carb)

  fitness = 1/((price/1000) + sum_penalties)
  return fitness

In [47]:
def selection(population, fitness, n_tournament=3):
  selection_index = 0
  for n in range(n_tournament):
    r = randint(0, len(population)-1)
    if fitness[r] > fitness[selection_index]:
      selection_index = r
  return population[selection_index]



In [48]:
def crossover(parent1, parent2, pc):
  if random.uniform(0.0, 1.0) < pc:
    point1 = randint(1, len(parent1)-3)
    point2 = randint(point1, len(parent1)-2)

    children1 = parent2[:point1] + parent1[point1:point2] + parent2[point2:]
    children2 = parent1[:point1] + parent2[point1:point2] + parent1[point2:]
    return children1, children2
  else:
    return parent1, parent2

In [49]:
def mutation(individual, pm):
  for allele in range(len(individual)):
    if random.uniform(0.0, 1.0) < pm:
      creep = randint(-3, 3)
      individual[allele] = individual[allele] + creep
      # change with random numbers if the mutation was outside the domain
      if (individual[allele]<0 or individual[allele]>DOMAIN):
        individual[allele] = randint(0,DOMAIN)
  return individual

In [50]:
def continuous_update(prev_population, children, n_population):
  all_population = prev_population + children

  selected_idx = set()
  while len(selected_idx) < n_population:
    idx = randint(0, len(all_population)-1)
    selected_idx.add(idx)

  new_population = list()
  for idx in selected_idx:
    new_population.append(all_population[idx])

  assert len(new_population) == n_population
  return new_population

In [51]:
def genetic_algorithm(needs, n_iter, n_population, pc, pm):
  population = get_population(n_population)

  best_fits = list()

  best_individual, best_fit = population[0], get_fitness(population[0], needs)
  best_fits.append(best_fit)

  for generation in range(n_iter):
    fitness = [get_fitness(individual, needs) for individual in population]
    for individual in range(n_population):
      if fitness[individual] > best_fit:
        best_individual, best_fit = population[individual], fitness[individual]
        best_fits.append(best_fit)
    print(f'{generation+1}-th, best fit: {population[individual]}, fitness: {fitness[individual]}')

    mating_pool = n_population//2
    if (mating_pool%2 != 0):
      mating_pool += 1

    parents = [selection(population, fitness) for _ in range(mating_pool)]
    children = list()
    for i in range(0, mating_pool, 2):
      parent1, parent2 = parents[i], parents[i+1]
      for individual in crossover(parent1, parent2, pc):
        mutation(individual, pm)
        children.append(individual)
    population = continuous_update(population, children, n_population)

  best_fitness_mean = sum(best_fits)/len(best_fits)
  return best_individual, best_fit, best_fitness_mean

In [52]:
def exhaustive_search(needs):
  n_population_params = [10, 30, 50]
  n_iteration = 100
  pc_params = [0.8, 0.85, 0.9]
  pm_params = [0.0002, 0.067]

  best_n_population = n_population_params[0]
  best_pc = pc_params[0]
  best_pm = pm_params[0]

  best_individual, best_fit, best_fitness_mean = genetic_algorithm(needs, n_iteration, best_n_population, best_pc, best_pm)

  for n_pop_param in n_population_params:
      for pc_param in pc_params:
        for pm_param in pm_params:
            individual, fit, fitness_mean = genetic_algorithm(needs, n_iteration, n_pop_param, pc_param, pm_param)
            if fit > best_fit:
              best_individual = individual

              best_fit = fit
              best_fitness_mean = fitness_mean
              best_n_population = n_pop_param
              best_pc = pc_param
              best_pm = pm_param

  return best_individual, best_fit, best_fitness_mean, best_n_population, n_iteration, best_pc, best_pm


In [53]:
age = 23
weight = 70
height = 175
activity = "light"
ACTIVITY_MAP = {'total_rest': 1,
             'very_light': 1.3,
             'light': 1.6,
             'moderate': 1.7,
             'heavy': 2.1,
             'very_heavy': 2.4}



In [54]:
needs = get_needs(age, weight, height, ACTIVITY_MAP[activity])

In [55]:
best_individual, best_fit, best_fitness_mean, best_n_population, best_n_iter, best_pc, best_pm = exhaustive_search(needs)

1-th, best fit: [158, 50, 100, 153, 51, 8, 73, 188, 27, 83, 146, 60, 21, 163, 176], fitness: 0.00029913627392267564
2-th, best fit: [24, 185, 57, 36, 63, 156, 144, 39, 114, 127, 62, 27, 115, 184, 194], fitness: 0.0005980145915560337
3-th, best fit: [24, 185, 57, 36, 63, 156, 144, 39, 114, 127, 62, 27, 115, 184, 194], fitness: 0.0005980145915560337
4-th, best fit: [24, 185, 57, 36, 63, 156, 144, 39, 114, 127, 62, 27, 115, 184, 194], fitness: 0.0005980145915560337
5-th, best fit: [24, 185, 57, 36, 63, 156, 144, 39, 114, 127, 62, 27, 115, 184, 194], fitness: 0.0005980145915560337
6-th, best fit: [24, 185, 57, 36, 63, 156, 144, 39, 114, 127, 62, 27, 115, 184, 194], fitness: 0.0005980145915560337
7-th, best fit: [117, 40, 13, 77, 22, 74, 78, 84, 114, 127, 62, 27, 115, 169, 32], fitness: 0.0003384768271994565
8-th, best fit: [24, 185, 57, 36, 63, 156, 144, 39, 114, 127, 62, 27, 115, 184, 194], fitness: 0.0005980145915560337
9-th, best fit: [24, 185, 57, 36, 63, 156, 144, 39, 114, 127, 62, 27

In [56]:
print(f'got best parameter n_population: {best_n_population}, n_iteration: {best_n_iter}, best_pc: {best_pc}, best_pm: {best_pm}')

got best parameter n_population: 30, n_iteration: 100, best_pc: 0.8, best_pm: 0.067


In [57]:
print(f'best individual: {best_individual}, with fitness: {best_fit}')

best individual: [71, 53, 177, 154, 50, 151, 47, 32, 173, 72, 99, 169, 112, 152, 91], with fitness: 0.007082654578936272


In [58]:
ingredients = df.iloc[best_individual]
ingredients

Unnamed: 0,No,Ingredient,Unit (gr),Protein(gr),Fat(gr),Carbohydrate(gr),Protein(cal),Fat(cal),Carbohydrate(cal),Price (IDR),Total Calories
71,71,Throat,100,2.0,0.1,7.9,8.0,0.9,31.6,6000,40.5
53,53,Snapper fish,100,20.0,0.7,0.0,80.0,6.3,0.0,9500,86.3
177,177,Puffer fish,100,21.3,3.4,2.2,85.2,30.6,8.8,3800,124.6
154,154,Mangosteen,100,0.6,0.6,15.6,2.4,5.4,62.4,2290,70.2
50,50,Raw jali,100,11.0,4.0,61.0,44.0,36.0,244.0,2800,324.0
151,151,Cane root,100,2.4,0.3,5.5,9.6,2.7,22.0,5000,34.3
47,47,Andaliman,100,4.6,1.0,18.0,18.4,9.0,72.0,15800,99.4
32,32,Cheese,200,22.8,20.3,13.1,91.2,182.7,52.4,20100,326.3
173,173,Gedi leaves,100,2.9,0.7,10.0,11.6,6.3,40.0,4000,57.9
72,72,Comca nut,100,16.4,4.3,55.8,65.6,38.7,223.2,6900,327.5


In [66]:
protein, fat, carb = needs
print(f'protein: {protein}, fat: {fat}, carbohydrate:{carb}')

protein: 662.7579999999999, fat: 397.65479999999997, carbohydrate:1590.6191999999999


In [59]:
ingredients['Carbohydrate(cal)'].sum()

1633.6000000000001

In [62]:
ingredients['Fat(cal)'].sum()

383.4

In [64]:
ingredients['Protein(cal)'].sum()

652.0

In [65]:
ingredients['Price (IDR)'].sum()

106650