In [1]:
from collections import OrderedDict

import pandas as pd
import numpy as np

from hyperopt import hp
from hyperopt import fmin, tpe, space_eval

## Get data

Goal: get a combination of `dark_ch`, `nibs` and `cocoa` that will have the same properties as 35g of `target`.

In [2]:
target = pd.DataFrame(OrderedDict({
    'protein': [13.5 / 100],
    'fat': [49.4 / 100],
    'carbs': [13.6 / 100]
}))

dark_ch = pd.DataFrame(OrderedDict({
    'protein': [9.8 / 100], 
    'fat': [43 / 100],
    'carbs': [32 / 100] 
}))

nibs = pd.DataFrame(OrderedDict({
    'protein': [14 / 100], 
    'fat': [54 / 100],
    'carbs': [11 / 100]    
}))

cocoa = pd.DataFrame(OrderedDict({
    'protein': [21 / 100], 
    'fat': [11 / 100],
    'carbs': [10 / 100]   
}))

## Optimize using `hyperopt`

In [3]:
# Define an objective function
def objective(params):
    
    # Unpack params
    amount_dark_ch = params['dark_ch']
    amount_nibs = params['nibs']
    amount_cocoa = params['cocoa']
    
    total = amount_dark_ch * dark_ch \
        + amount_nibs * nibs \
        + amount_cocoa * cocoa
    
    loss = (total - 35*target).values**2
    
    return np.sum(loss)

In [4]:
# Define a search space
space = {
    'dark_ch': hp.uniform('dark_ch', 1, 35),
    'nibs': hp.uniform('nibs', 1, 35),
    'cocoa': hp.uniform('cocoa', 1, 35),   
}

# Minimize the objective over the space
best = fmin(objective, space, algo=tpe.suggest, max_evals=5000)

100%|█████████████████████████████████████████| 5000/5000 [04:01<00:00, 20.68trial/s, best loss: 0.0007349313201420041]


In [5]:
best

{'cocoa': 1.6263269259884072,
 'dark_ch': 4.794805460652948,
 'nibs': 27.830607643708824}

In [6]:
print('35 g of target:')
print(35 * target)

print('\nOur mix:')
print(best['nibs'] * nibs + best['cocoa'] * cocoa + best['dark_ch'] * dark_ch)

35 g of target:
   protein    fat  carbs
0    4.725  17.29   4.76

Our mix:
    protein       fat     carbs
0  4.707705  17.26919  4.758337


## Linear system solve

In [7]:
# Assign variables
A = pd.concat([nibs, cocoa, dark_ch]).values.T
b = (target.values * 35).T

In [8]:
# Check if the feature matrix has a non-zero determinant
np.linalg.det(A)

-0.023340800000000002

In [9]:
# Sanity check
A, b

(array([[0.14 , 0.21 , 0.098],
        [0.54 , 0.11 , 0.43 ],
        [0.11 , 0.1  , 0.32 ]]),
 array([[ 4.725],
        [17.29 ],
        [ 4.76 ]]))

In [10]:
# Solve the system
solution = np.dot(np.linalg.inv(A), b).squeeze()

In [11]:
# Get results
print('35 g of target:')
print(35 * target)

print('\nOur mix:')
print(solution[0] * nibs + solution[1] * cocoa + solution[2] * dark_ch)

35 g of target:
   protein    fat  carbs
0    4.725  17.29   4.76

Our mix:
   protein    fat  carbs
0    4.725  17.29   4.76


This solution is somehow **surprising** as the data comes from real-world ingredients 🤯

Despite this, the solution looks pretty **exact**! 

If you see an error here, please let me know! 

In [14]:
# Compare solutions
solution, best

(array([27.88189779,  1.68950936,  4.76262596]),
 {'cocoa': 1.6263269259884072,
  'dark_ch': 4.794805460652948,
  'nibs': 27.830607643708824})