# Build Classification Models

In [13]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve
from sklearn.svm import SVC
import numpy as np


In [14]:
cuisines_df = pd.read_csv("../data/cleaned_cuisines.csv")
cuisines_df.shape

(3995, 382)

In [15]:
cuisines_label_df = cuisines_df['cuisine']

In [16]:
cuisines_feature_df = cuisines_df.drop(columns=['Unnamed: 0', 'cuisine'])
cuisines_feature_df.head()

Unnamed: 0,almond,angelica,anise,anise_seed,apple,apple_brandy,apricot,armagnac,artemisia,artichoke,...,whiskey,white_bread,white_wine,whole_grain_wheat_flour,wine,wood,yam,yeast,yogurt,zucchini
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [17]:
cuisines_feature_df.head()

Unnamed: 0,almond,angelica,anise,anise_seed,apple,apple_brandy,apricot,armagnac,artemisia,artichoke,...,whiskey,white_bread,white_wine,whole_grain_wheat_flour,wine,wood,yam,yeast,yogurt,zucchini
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [18]:
X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)

In [19]:
lr = LogisticRegression(multi_class='ovr',solver='liblinear')
model = lr.fit(X_train, np.ravel(y_train))

accuracy = model.score(X_test, y_test)
print ("Accuracy is {}".format(accuracy))

Accuracy is 0.8056713928273561


In [26]:
print(f'ingredients: {X_test.iloc[60][X_test.iloc[60]!=0].keys()}')
print(f'cuisine: {y_test.iloc[60]}')

ingredients: Index(['coconut', 'coriander', 'cumin', 'fenugreek', 'fish', 'lime', 'pepper',
       'turmeric'],
      dtype='object')
cuisine: thai


In [27]:
cuisines_df[(cuisines_df['coconut'] != 0) & (cuisines_df['coriander'] != 0) & (cuisines_df['cumin'] != 0) & (cuisines_df['fenugreek'] != 0) & (cuisines_df['fish'] != 0) & (cuisines_df['lime'] != 0) & (cuisines_df['pepper'] != 0) & (cuisines_df['turmeric'] != 0)]


Unnamed: 0.1,Unnamed: 0,cuisine,almond,angelica,anise,anise_seed,apple,apple_brandy,apricot,armagnac,...,whiskey,white_bread,white_wine,whole_grain_wheat_flour,wine,wood,yam,yeast,yogurt,zucchini
339,339,thai,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
404,404,thai,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
422,422,thai,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
424,424,thai,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2315,2315,thai,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2319,2319,thai,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2329,2329,thai,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3560,3560,thai,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3977,3977,thai,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
test= X_test.iloc[60].values.reshape(-1, 1).T
proba = model.predict_proba(test)
classes = model.classes_
resultdf = pd.DataFrame(data=proba, columns=classes)

topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])
topPrediction.head()



Unnamed: 0,0
thai,0.927576
indian,0.062535
japanese,0.009379
korean,0.000341
chinese,0.000169
