In [3]:
# -------------------------------------------------------- Imports --------------------------------------------------------
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve
from sklearn.svm import SVC
import numpy as np

In [None]:
# -------------------------------------------------------- Load Data --------------------------------------------------------
cuisines_df = pd.read_csv("cleaned_cuisines.csv")
cuisines_df.head()

In [None]:
# -------------------------------------------------------- Shape Data --------------------------------------------------------
cuisines_label_df = cuisines_df['cuisine']
cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)
cuisines_label_df.head()

In [6]:
# ------------------------------------------ Logistic Regression Multiclass Model ------------------------------------------

In [7]:
# ---- Split ----
X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)

In [8]:
# ---- Model & Train ----
lr = LogisticRegression(multi_class='ovr',solver='liblinear')
model = lr.fit(X_train, np.ravel(y_train))

In [9]:
# ---- Evaluate ----
accuracy = model.score(X_test, y_test)
print ("Accuracy is {}".format(accuracy))

Accuracy is 0.8098415346121768


In [10]:
# ------ Simple Test ---------
print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')
print(f'cuisine: {y_test.iloc[50]}')

ingredients: Index(['coriander', 'cumin', 'fenugreek', 'pepper', 'soy_sauce', 'turmeric'], dtype='object')
cuisine: japanese


In [None]:
test= X_test.iloc[50].values.reshape(-1, 1).T
proba = model.predict_proba(test)
classes = model.classes_
resultdf = pd.DataFrame(data=proba, columns=classes)
topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])
topPrediction.head()

In [12]:
#-------------------------------------------------------- Predict --------------------------------------------------------
y_pred = model.predict(X_test)

In [None]:
#-------------------------------------------------------- Evaluate --------------------------------------------------------
print(classification_report(y_test,y_pred))

In [14]:
#----------------------------------------------------- Other Classifiers -----------------------------------------------------

In [15]:
# ---- Imports ----
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve
import numpy as np

In [16]:
# ---- Split Data ----
X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)

In [17]:
# ---- Models Setup ----
C = 10
classifiers = {
    'Linear SVC': SVC(kernel='linear', C=C, probability=True,random_state=0),
    'SVC': SVC(),
    'RFST': RandomForestClassifier(n_estimators=100),
    'ADA': AdaBoostClassifier(n_estimators=100)
}
n_classifiers = len(classifiers)

In [18]:

# ---- Train & Evaluate ----
for index, (name, classifier) in enumerate(classifiers.items()):
    classifier.fit(X_train, np.ravel(y_train))

    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy (train) for %s: %0.1f%% " % (name, accuracy * 100))
    print(classification_report(y_test,y_pred))

Accuracy (train) for Linear SVC: 77.3% 
              precision    recall  f1-score   support

     chinese       0.71      0.69      0.70       268
      indian       0.89      0.88      0.88       232
    japanese       0.75      0.75      0.75       248
      korean       0.85      0.73      0.79       241
        thai       0.70      0.84      0.76       210

    accuracy                           0.77      1199
   macro avg       0.78      0.78      0.78      1199
weighted avg       0.78      0.77      0.77      1199

Accuracy (train) for SVC: 81.9% 
              precision    recall  f1-score   support

     chinese       0.78      0.75      0.77       268
      indian       0.94      0.91      0.92       232
    japanese       0.80      0.77      0.79       248
      korean       0.90      0.78      0.83       241
        thai       0.72      0.90      0.80       210

    accuracy                           0.82      1199
   macro avg       0.83      0.82      0.82      1199
weig