# Application of model to predict nutritional score

In [1]:
import pandas as pd
import numpy as np
import re
from projetpython.logit_model import get_predict_data
from projetpython.text_treatment import allergenes
import pickle

### First, we get the data

In [2]:
data = get_predict_data()
def str_to_list (string = ""):
    string = re.sub("[\[\]'\s]*", "",string)
    return string.split(",")
data.tags = data.tags.apply(str_to_list)
data.head()

Unnamed: 0,score,tags,lait,sel,sucre,ble,gluten,amidon,soja,oeuf,...,paprika,epices,naturels,poivron,jaune,drive,jambon,persil,maigre,pomme
2,3,"[gluten, soja, celeri, moutarde]",0,1,0,0,1,0,1,0,...,0,1,0,1,0,0,0,0,0,0
6,1,"[gluten, oeuf, poisson, lait]",1,1,0,1,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
7,2,"[gluten, oeuf, crustaces, poisson, moutarde]",0,1,1,1,0,1,0,1,...,1,0,0,1,1,0,0,0,0,0
8,3,"[oeuf, moutarde]",0,1,0,0,0,1,0,1,...,0,0,0,0,1,0,1,0,0,0
9,2,"[gluten, oeuf, crustaces, poisson, lait, mouta...",1,1,1,1,0,1,0,1,...,1,0,0,0,1,0,0,0,0,0


We need input form the user to know what allergies he needs to avoid.

In [3]:
def my_allergies():
    allergies = list((allergenes.keys())) + ["That's all"]
    answer = []
    print("Please, indicate what your allergies are.(use numbers!)")
    while True:
        for i in range(len(allergies)-1):
            print("{}. {}".format(i+1 , allergies[i]))
        print("{}. {}".format(0 , allergies[-1]))
        try:
            res = int(input().strip()) 
            if  res == 0:
                print("Thanks!")
                break
            elif res == len(allergies):
                raise ValueError
            else:
                answer.append(allergies.pop(res-1))
                
        except (ValueError , IndexError):
            print("Please, use valid the numbers!")
        print("Please, indicate what your other allergies are.(use numbers!)")
    return answer

answer = set(my_allergies())

        

Please, indicate what your allergies are.(use numbers!)
1. epinards
2. amidon
3. gluten
4. oeuf
5. crustaces
6. poisson
7. arachide
8. soja
9. lait
10. fruits_coques
11. celeri
12. moutarde
13. sesame
14. lupin
15. mollusque
16. so2
0. That's all
3
Please, indicate what your other allergies are.(use numbers!)
1. epinards
2. amidon
3. oeuf
4. crustaces
5. poisson
6. arachide
7. soja
8. lait
9. fruits_coques
10. celeri
11. moutarde
12. sesame
13. lupin
14. mollusque
15. so2
0. That's all
3
Please, indicate what your other allergies are.(use numbers!)
1. epinards
2. amidon
3. crustaces
4. poisson
5. arachide
6. soja
7. lait
8. fruits_coques
9. celeri
10. moutarde
11. sesame
12. lupin
13. mollusque
14. so2
0. That's all
7
Please, indicate what your other allergies are.(use numbers!)
1. epinards
2. amidon
3. crustaces
4. poisson
5. arachide
6. soja
7. fruits_coques
8. celeri
9. moutarde
10. sesame
11. lupin
12. mollusque
13. so2
0. That's all
0
Thanks!


We look for matches

In [4]:
print(answer)


{'gluten', 'oeuf', 'lait'}


In [5]:
matches = data.tags.apply(lambda x : not bool(set(x) & answer))

In [6]:
print(sum(matches))
print(data.shape)

1562
(5074, 52)


In [7]:
data = data[matches]

We take the average of every product.

In [8]:
X_aller = data.iloc[:,2:].copy()
X_aller = X_aller.apply(np.mean , axis=0)
X_aller= np.array(X_aller).reshape(1, -1)

We load the model

In [9]:
logit = pickle.load(open("auchan_classification.sav", 'rb'))
result = float(logit.predict(X_aller))

### Result:

In [10]:
def predict(result , answer):
    score = {
        "4" : "A",
        "3" : "B",
        "2" : "C",
        "1" : "D",
        "0" : "E"
    }
    print("By being allergic to {} , your nutritional score is {}".format(
        ", ".join(list(answer)) , score[str(int(result))]))

predict(result , answer)

By being allergic to gluten, oeuf, lait , your nutritional score is A
