# Sommaire

* [Import des librairies](#Import-des-Librairies)
* [Dataset](#Dataset)
    * [Encodage du nutriscore](#Encodage-du-nutriscore)
* [Remplacement par la prédiction](#Remplacement-par-la-prédiction)
* [model de l'application](#model-de-l'application)
* [Création de l'application](#Création-de-l'application)


# Import des Librairies

In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import seaborn as sns
from sklearn.impute import MissingIndicator 
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression

# Dataset 

In [2]:
df = pd.read_csv('df_nutriscore.csv' , delimiter = ",")

In [3]:
df = df.drop(['proteins_100g','carbohydrates_100g','sodium_100g'],axis=1)

In [4]:
df.shape

(647654, 7)

In [5]:
df_isnull = df.isnull()
df_row = df_isnull.any(axis=1)
df_nan = df[df_row]
df_nan.head()

Unnamed: 0,energy_100g,energy-kcal_100g,fat_100g,sugars_100g,saturated-fat_100g,salt_100g,nutriscore_grade
0,1569.0,375.0,7.0,15.0,3.08,1.4,
1,88.0,21.0,0.0,0.4,0.0,2.04,
2,251.0,60.0,3.0,3.0,1.0,1.15,
3,264.0,63.0,0.0,15.5,0.0,0.0,
5,540.0,129.0,4.9,16.3,3.1,0.25,


In [6]:
df_nan.shape

(328718, 7)

In [7]:
df_train = df.dropna()
df_train.head()

NumExpr defaulting to 8 threads.


Unnamed: 0,energy_100g,energy-kcal_100g,fat_100g,sugars_100g,saturated-fat_100g,salt_100g,nutriscore_grade
4,134.0,32.0,0.3,3.9,0.1,0.42,b
10,1594.0,381.0,22.0,21.9,15.5,0.1,d
15,657.0,157.0,0.0,27.0,0.0,0.0,a
18,598.0,143.0,12.7,1.0,1.0,0.27,b
30,824.0,197.0,13.0,0.9,4.0,1.1,c


## Encodage du nutriscore

In [8]:
def convert_grade(x):
    if x == 'a':
        return 1
    if x == 'b':
        return 2
    if x == 'c':
        return 3
    if x == 'd':
        return 4
    if x == 'e':
        return 5
df_train['nutriscore_grade'] = df_train['nutriscore_grade'].apply(convert_grade)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train['nutriscore_grade'] = df_train['nutriscore_grade'].apply(convert_grade)


In [9]:
sns.pairplot(data=df_train, hue = 'nutriscore_grade')

In [94]:
df_train['nutriscore_grade'].value_counts()

4    88665
3    78978
1    55788
2    51052
5    44453
Name: nutriscore_grade, dtype: int64

# Remplacement par la prédiction

In [88]:
X_train = df_train.drop(['nutriscore_grade'], axis=1)
y_train =df_train['nutriscore_grade']

X_test = df_nan.drop(['nutriscore_grade'],axis=1)
X_test

Unnamed: 0,energy_100g,energy-kcal_100g,fat_100g,sugars_100g,saturated-fat_100g,salt_100g
0,1569.0,375.0,7.0,15.0,3.08,1.4


In [89]:
ss=StandardScaler()
X_test=ss.fit_transform(X_test)
X_train=ss.fit_transform(X_train)

In [90]:
model = KNeighborsClassifier(n_neighbors = 6)

model.fit(X_train,y_train)

KNeighborsClassifier(n_neighbors=6)

In [91]:
y_pred = model.predict(X_test)
score = model.score(X_train, y_train)
print(score)

0.8051646725361828


In [92]:
y_pred

array([3], dtype=int64)

In [59]:
y_pred = np.round(y_pred)
df_nan.loc[df_nan.nutriscore_grade.isnull(), 'nutriscore_grade'] = y_pred
df_nan.head()

Unnamed: 0,energy_100g,energy-kcal_100g,fat_100g,sugars_100g,saturated-fat_100g,salt_100g,nutriscore_grade
0,1569.0,375.0,7.0,15.0,3.08,1.4,4
1,88.0,21.0,0.0,0.4,0.0,2.04,3
2,251.0,60.0,3.0,3.0,1.0,1.15,3
3,264.0,63.0,0.0,15.5,0.0,0.0,1
5,540.0,129.0,4.9,16.3,3.1,0.25,3


In [17]:
df_complet = df_train.append(df_nan, ignore_index=True)
df_complet.head()

Unnamed: 0,energy_100g,energy-kcal_100g,fat_100g,sugars_100g,saturated-fat_100g,salt_100g,nutriscore_grade
0,134.0,32.0,0.3,3.9,0.1,0.42,2
1,1594.0,381.0,22.0,21.9,15.5,0.1,4
2,657.0,157.0,0.0,27.0,0.0,0.0,1
3,598.0,143.0,12.7,1.0,1.0,0.27,2
4,824.0,197.0,13.0,0.9,4.0,1.1,3


In [None]:
#sns.pairplot(data=df_complet, hue = 'nutriscore_grade')

# model de l'application

In [18]:
X = df_train.drop(['nutriscore_grade'], axis=1)
y= df_train['nutriscore_grade']

In [19]:
ss=StandardScaler()
X=ss.fit_transform(X)

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=10)

Modèle de RandomForest

In [23]:
for reg_param in (20,30,40,50,60):
    print(reg_param)
    model = RandomForestClassifier(n_estimators =reg_param)
    model.fit(X_train, y_train)
    accuracies = model.score(X_test, y_test)
    print(f"accurancy: {np.mean(accuracies) * 100:.2f}%")

20
accurancy: 75.77%
30
accurancy: 76.13%
40
accurancy: 76.43%
50
accurancy: 76.37%
60
accurancy: 76.47%


Modèle de Knn

In [22]:
for reg_param in (2,3,4,5,6,7,8):
    print(reg_param)
    model = KNeighborsClassifier(n_neighbors =reg_param)
    model.fit(X_train, y_train)
    accuracies = model.score(X_test, y_test)
    print(f"accurancy: {np.mean(accuracies) * 100:.2f}%")

2
accurancy: 69.20%
3
accurancy: 72.36%
4
accurancy: 72.42%
5
accurancy: 73.45%
6
accurancy: 73.30%
7
accurancy: 73.96%
8
accurancy: 73.77%


In [105]:
import pickle

model = RandomForestClassifier(n_estimators =60)
model.fit(X_train, y_train)
                               
with open("model.pkl", "wb") as file:
    pickle.dump(model, file)

# Création de l'application

In [109]:
df_app = pd.DataFrame(index=['0'], columns=['energy_100g','energy-kcal_100g',
                                            'fat_100g','sugars_100g','saturated-fat_100g','salt_100g'])

energie = input('entrer le nombre de energie/100g: ')
energie_kcal = input('entrer le nombre de energie_kcal/100g: ')
fat = input('entrer le nombre de matières grasses: ')
sat_fat = input('entrer le nombre de graisse saturé: ')
sugar = input('entrer le nombre de sucre: ')
salt = input('entrer le nombre de sel: ')


df_app.at['0', 'energy_100g'] = energie
df_app.at['0', 'energy-kcal_100g'] = energie_kcal
df_app.at['0', 'fat_100g'] = fat
df_app.at['0', 'sugars_100g'] = sugar
df_app.at['0', 'saturated-fat_100g'] = sat_fat
df_app.at['0', 'salt_100g'] = salt


X_train = df_train.drop(['nutriscore_grade'], axis=1)
y_train =df_train['nutriscore_grade']
X_test = df_app



model = pickle.load(open("model.pkl","rb"))


y_pred = model.predict(X_test)

def score(x):
    if x ==5:
        return 'E'
    elif x ==4:
        return 'D'
    elif x == 3:
        return 'C'
    elif x == 2:
        return 'B'
    else:
        return 'A'
        
print('le nutriscore de votre produit est: ',score(y_pred))

entrer le nombre de energie/100g: 630
entrer le nombre de energie_kcal/100g: 1
entrer le nombre de matières grasses: 2
entrer le nombre de graisse saturé: 0
entrer le nombre de sucre: 0
entrer le nombre de sel: 0
le nutriscore de votre produit est:  A
