In [19]:
max_fat = 75.0
min_fat = 68.0
max_carb = 300.0
min_carb = 290.0
max_protien = 202.0
min_protien = 192.0
max_cholesterol = 300.0


f = '/home/abemillan/food_data.tsv'

In [109]:
import csv
from collections import OrderedDict

food_tsv = csv.DictReader(open(f), delimiter='\t')

nutrients = [
    'total_fat',
    'sat_fat',
    'trans_fat',
    'polyunsaturated_fat',
    'monounsaturated_fat',
    'total_carbs',
    'dietary_fiber',
    'sugars',
    'protein',
    'vitamin_A',
    'vitamin_B6',
    'vitamin_C',
    'vitamin_D',
    'vitamin_K',
#     'thiamin',
#     'riboflavin',
#     'niacin',
#     'folate',
#     'pantothenic_acid',
    'sodium',
    'potassium',
    'calcium',
    'magnesium',
#     'phosphorus',
#     'manganese',
#     'zinc',
#     'copper',
#     'selenium',
    'iron',
    'cholesterol'
]
# vars_list = []
# nutrient_data = OrderedDict()
# incomplete_data = {}
def get_complete_nutrient_count(food_data):
    def count_complete(row):
        count = 0
        for r in row:
            if r:
                count += 1

        return count

    return { nutrient: float(count_complete(food_data[nutrient])) for nutrient in nutrients }

def get_clean_food_data(food_data):
    clean_data = OrderedDict()
    for key in food_data.keys():
        if key not in nutrients:
            clean_data[key] = food_data[key]
            
    for nutrient in nutrients:
        clean_data[nutrient] = []
        for i, data in enumerate(food_data[nutrient]):
            if not data:
                clean_data[nutrient].append(0.0)
            else:
                clean_data[nutrient].append(float(data) / float(clean_data['serving_size'][i]))
    return clean_data

def filter_food_data_by_completeness(food_data, complete_count, thres=0.6):
    total_foods = len(food_data['food'])
    for nutrient in nutrients:
        if (complete_count[nutrient] / total_foods) < thres:
            food_data.pop(nutrient)
            
            print "Dropped Nutrient: {}".format(nutrient)
    return food_data     

food_data_dict = OrderedDict()
for row in food_tsv:
    for c in food_tsv.fieldnames:
        if not food_data_dict.get(c):
            food_data_dict[c] = []
        
        food_data_dict[c].append(row[c])

complete_count = get_complete_nutrient_count(food_data_dict)
clean_food = get_clean_food_data(food_data_dict)
food_data = filter_food_data_by_completeness(clean_food, complete_count, thres=0.0)
food_names = [food_data_dict['food'][i].lower().replace(' ', '_') for i in xrange(len(food_data_dict['food']))]


In [118]:
food_names.index('chicken_breast')

0

In [164]:
from cplex import *
obj_fn = [float(x) for x in food_data['price_per_unit']] # [0.0 for _ in xrange(len(food_names))]
#obj_fn[13] = -1.0

obj_fn[food_names.index('salt')] *= 100.0
obj_fn[food_names.index('pepper')] *= 100.0
obj_fn[food_names.index('apple')] = obj_fn[food_names.index('orange')]
obj_fn[food_names.index('banana')] = obj_fn[food_names.index('orange')]
obj_fn[food_names.index('pinapple')] = obj_fn[food_names.index('orange')]
obj_fn[food_names.index('blueberries')] = obj_fn[food_names.index('orange')]


lb = [0.0 for _ in xrange(len(food_names))]
lb[food_names.index('pepper')] = 2
lb[food_names.index('salt')] = 2
# lb[food_names.index('apple')] = 3
# lb[food_names.index('banana')] = 3
# lb[food_names.index('pinapple')] = 3
# lb[food_names.index('blueberries')] = 3
# lb[food_names.index('orange')] = 3
lb[food_names.index('steal_cut_oats')] = 75
lb[food_names.index('sweet_potatoe')] = 50





ub = [infinity for _ in xrange(len(food_names))]
ub[food_names.index('spinash')] = 200

ctypes = "I" * len(food_names)


sense = ""
constraints = []
rhs = []
row_names = []

for nutrient in nutrients:
    if not food_data.get(nutrient):
        continue
        
    if nutrient == "trans_fat":
        continue
    
    if nutrient == "sugars":
        sense += "L"
        rhs.append(70.0)
        row_names.append("sugar")
        constraints.append(SparsePair(ind=food_names, val=food_data[nutrient]))
        
    elif nutrient == "total_fat":
        sense += "L"
        rhs.append(max_fat)
        row_names.append("max_fat")
        constraints.append(SparsePair(ind=food_names, val=food_data[nutrient]))

        
        sense += "G"
        rhs.append(min_fat)
        row_names.append("min_fat")
        constraints.append(SparsePair(ind=food_names, val=food_data[nutrient]))

    elif nutrient == "total_carbs":
        sense += "L"
        rhs.append(max_carb)
        row_names.append("max_carb")
        constraints.append(SparsePair(ind=food_names, val=food_data[nutrient]))

        
        sense += "G"
        rhs.append(min_carb)
        row_names.append("min_fat")
        constraints.append(SparsePair(ind=food_names, val=food_data[nutrient]))
        
    elif nutrient == "protein":
        sense += "L"
        rhs.append(max_protien)
        row_names.append("max_protien")
        constraints.append(SparsePair(ind=food_names, val=food_data[nutrient]))
        
        sense += "G"
        rhs.append(min_protien)
        row_names.append("min_protien")
        constraints.append(SparsePair(ind=food_names, val=food_data[nutrient]))
        
    elif nutrient == "dietary_fiber":
        fiber_ratio = [food_data['dietary_fiber'][i] - 0.2*food_data['total_carbs'][i] for i in xrange(len(food_data[nutrient]))]
        
        sense += "L"
        rhs.append(0.0)
        row_names.append(nutrient)
        constraints.append(SparsePair(ind=food_names, val=fiber_ratio))
        
    elif nutrient == "cholesterol":
        sense += "L"
        rhs.append(max_cholesterol)
        row_names.append(nutrient)
        constraints.append(SparsePair(ind=food_names, val=food_data[nutrient]))
        
    elif nutrient == "sat_fat":
        saturated_ratio = [food_data['sat_fat'][i] - 0.1*food_data['total_fat'][i] for i in xrange(len(food_data[nutrient]))]
        
        sense += "L"
        rhs.append(0.0)
        row_names.append(nutrient)
        constraints.append(SparsePair(ind=food_names, val=saturated_ratio))

        
    elif nutrient == "polyunsaturated_fat":
        polyunsaturated_ratio = [food_data['polyunsaturated_fat'][i] - 0.25*food_data['total_fat'][i] for i in xrange(len(food_data[nutrient]))]

        sense += "L"
        rhs.append(0.0)
        row_names.append(nutrient)
        constraints.append(SparsePair(ind=food_names, val=polyunsaturated_ratio))

    elif nutrient == "monounsaturated_fat":
        monounsaturated_ratio = [food_data['monounsaturated_fat'][i] - 0.6*food_data['total_fat'][i] for i in xrange(len(food_data[nutrient]))]

        sense += "L"
        rhs.append(0.0)
        row_names.append(nutrient)
        constraints.append(SparsePair(ind=food_names, val=monounsaturated_ratio))

    else:
        if nutrient == 'sodium':
            sense += "L"
        else:
            sense += "G"
            
        rhs.append(100.0)
        row_names.append(nutrient)
        constraints.append(SparsePair(ind=food_names, val=food_data[nutrient]))

In [165]:
def clean_solution(s):
    for i in xrange(len(s)):
        if s[i] == -0.0:
            s[i] = 0.0
    return s
            
try:
    my_prob = Cplex()
    #my_prob.parameters.mip.pool.absgap.set(0.0)
    #my_prob.parameters.mip.pool.intensity.set(4)
    #my_prob.parameters.mip.limits.populate.set(20000)
    
    
    my_prob.objective.set_sense(my_prob.objective.sense.minimize)

    my_prob.variables.add(obj=obj_fn, lb=lb, ub=ub, types=ctypes,
                       names=food_names)


    my_prob.linear_constraints.add(lin_expr=constraints, senses=sense,
                                rhs=rhs, names=row_names)


    #my_prob.populate_solution_pool()
    my_prob.solve()
except exceptions.CplexError as exc:
    print(exc)

else:
    x_i = my_prob.solution.get_values()
    for i, data in enumerate(x_i):
        if k in ["Chicken_Breast", "wild_sockeye_salmon"]:
            print("Food: {}\t{}oz".format(food_names[i], data))
        elif k in ["Avocado_Oil", "Chicken_Broth"]:
            print("Food: {}\t{}mL".format(food_names[i], data))

        else:
            print("Food: {}\t{}g".format(food_names[i], data))

#     numcols = my_prob.variables.get_num()
    
    
#     s = set()
    
#     for i in xrange(my_prob.solution.pool.get_num()):
#         x_i = tuple(clean_solution(my_prob.solution.pool.get_values(i)))
        
#         if x_i in s:
#             continue
            

#         s.add(x_i)

            

CPXPARAM_Read_DataCheck                          1
Tried aggregator 1 time.
MIP Presolve eliminated 4 rows and 1 columns.
Reduced MIP has 18 rows, 21 columns, and 230 nonzeros.
Reduced MIP has 0 binaries, 21 generals, 0 SOSs, and 0 indicators.
Presolve time = 0.00 sec. (0.08 ticks)
Tried aggregator 1 time.
Reduced MIP has 18 rows, 21 columns, and 230 nonzeros.
Reduced MIP has 0 binaries, 21 generals, 0 SOSs, and 0 indicators.
Presolve time = 0.00 sec. (0.10 ticks)
MIP emphasis: balance optimality and feasibility.
MIP search method: dynamic search.
Parallel mode: deterministic, using up to 8 threads.
Root relaxation solution time = 0.00 sec. (0.09 ticks)

        Nodes                                         Cuts/
   Node  Left     Objective  IInf  Best Integer    Best Bound    ItCnt     Gap

      0     0    infeasible                                         18         

Root node processing (before b&c):
  Real time             =    0.02 sec. (0.58 ticks)
Parallel b&c, 8 threads:
  Re

CPLEX Error  1217: No solution exists.


CplexSolverError: CPLEX Error  1217: No solution exists.

In [143]:
pool_stats = OrderedDict()

for sol in s:
    for i in xrange(len(food_names)):
        if not pool_stats.get(food_names[i]):
            pool_stats[food_names[i]] = {}
        
        if not pool_stats[food_names[i]].get('min'):
            pool_stats[food_names[i]]['min'] = sol[i]
            pool_stats[food_names[i]]['max'] = sol[i]
            continue

        if sol[i] < pool_stats[food_names[i]]['min']:
            pool_stats[food_names[i]]['min'] = sol[i]
            
        if sol[i] > pool_stats[food_names[i]]['max']:
            pool_stats[food_names[i]]['max'] = sol[i]

In [144]:
for k in pool_stats.keys():
    if k in ["chicken_breast", "wild_sockeye_salmon"]:
        print("Food: {}\t\t\tMin: {}oz\tMax: {}oz".format(k, pool_stats[k]['min'], pool_stats[k]['max']))
    elif k in ["avocado_oil", "chicken_broth"]:
        print("Food: {}\t\t\tMin: {}mL\tMax: {}mL".format(k, pool_stats[k]['min'], pool_stats[k]['max']))

    else:
        print("Food: {}\t\t\tMin: {}g\tMax: {}g".format(k, pool_stats[k]['min'], pool_stats[k]['max']))

In [135]:
for k in pool_stats.keys():
    if k in ["Chicken_Breast", "wild_sockeye_salmon"]:
        print("Food: {}\tMin: {}oz\tMax: {}oz".format(k, pool_stats[k]['min'], pool_stats[k]['max']))
    elif k in ["Avocado_Oil", "Chicken_Broth"]:
        print("Food: {}\tMin: {}mL\tMax: {}mL".format(k, pool_stats[k]['min'], pool_stats[k]['max']))

    else:
        print("Food: {}\tMin: {}g\tMax: {}g".format(k, pool_stats[k]['min'], pool_stats[k]['max']))

In [163]:
my_prob.solution.get_objective_value()

11.836038648825001