# Build Classification Models

In [1]:
import pandas as pd
cuisines_df = pd.read_csv("../data/cleaned_cuisines.csv")
cuisines_df.head()

Unnamed: 0.1,Unnamed: 0,cuisine,almond,angelica,anise,anise_seed,apple,apple_brandy,apricot,armagnac,...,whiskey,white_bread,white_wine,whole_grain_wheat_flour,wine,wood,yam,yeast,yogurt,zucchini
0,0,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,indian,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve
from sklearn.svm import SVC
import numpy as np

In [3]:
cuisines_label_df  = cuisines_df['cuisine']
cuisines_label_df.head()

0    indian
1    indian
2    indian
3    indian
4    indian
Name: cuisine, dtype: object

In [4]:
cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)
cuisines_feature_df.head()

Unnamed: 0,almond,angelica,anise,anise_seed,apple,apple_brandy,apricot,armagnac,artemisia,artichoke,...,whiskey,white_bread,white_wine,whole_grain_wheat_flour,wine,wood,yam,yeast,yogurt,zucchini
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [5]:
X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)

In [15]:
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)

test_solver = ['liblinear', 'lbfgs', 'newton-cg', 'sag', 'saga']
test_penalty = ['l1', 'l2', 'elasticnet', None]

for solver in test_solver:
    for penalty in test_penalty:
        try:
            params = {
                'solver': solver,
                'penalty': penalty,
                'multi_class': 'ovr'
            }

            if penalty == 'elasticnet':
                params['l1_ratio'] = 0.5

            lr = LogisticRegression(penalty =penalty, multi_class='ovr', solver=solver)
            model = lr.fit(X_train, np.ravel(y_train))

            accuracy = model.score(X_test, y_test)
            print (f"Accuracy at solver:{solver} and penalty: {penalty} is {accuracy}")

        except ValueError:
            pass



Accuracy at solver:liblinear and penalty: l1 is 0.7964970809007507
Accuracy at solver:liblinear and penalty: l2 is 0.8040033361134279
Accuracy at solver:lbfgs and penalty: l2 is 0.8040033361134279




Accuracy at solver:lbfgs and penalty: None is 0.7856547122602169




Accuracy at solver:newton-cg and penalty: l2 is 0.8040033361134279




Accuracy at solver:newton-cg and penalty: None is 0.7848206839032527




Accuracy at solver:sag and penalty: l2 is 0.8040033361134279




Accuracy at solver:sag and penalty: None is 0.7939949958298582




Accuracy at solver:saga and penalty: l1 is 0.7973311092577148




Accuracy at solver:saga and penalty: l2 is 0.8040033361134279




Accuracy at solver:saga and penalty: None is 0.7981651376146789


In [17]:
print(f'ingredients: {X_test.iloc[110][X_test.iloc[50]!=0].keys()}')
print(f'cuisine: {y_test.iloc[110]}')

ingredients: Index(['black_bean', 'cayenne', 'coriander', 'cumin', 'fenugreek', 'honey',
       'lamb', 'orange', 'pepper', 'sesame_oil', 'sesame_seed', 'sherry',
       'soy_sauce', 'soybean', 'turmeric', 'vinegar'],
      dtype='object')
cuisine: japanese


In [18]:
test= X_test.iloc[50].values.reshape(-1, 1).T
proba = model.predict_proba(test)
classes = model.classes_
resultdf = pd.DataFrame(data=proba, columns=classes)

topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])
topPrediction.head()



Unnamed: 0,0
chinese,0.929739
korean,0.035955
japanese,0.027159
indian,0.007074
thai,7.3e-05


In [19]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

     chinese       0.77      0.66      0.71       264
      indian       0.93      0.90      0.91       227
    japanese       0.73      0.76      0.75       236
      korean       0.79      0.82      0.81       224
        thai       0.78      0.87      0.82       248

    accuracy                           0.80      1199
   macro avg       0.80      0.80      0.80      1199
weighted avg       0.80      0.80      0.80      1199

