In [None]:
# -------------------------------------------------------- Imports --------------------------------------------------------
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve
from sklearn.svm import SVC
import numpy as np

In [None]:
# -------------------------------------------------------- Load Data --------------------------------------------------------
cuisines_df = pd.read_csv("cleaned_cuisines.csv")
cuisines_df.head()

In [None]:
# -------------------------------------------------------- Shape Data --------------------------------------------------------
cuisines_label_df = cuisines_df['cuisine']
cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)
cuisines_label_df.head()

In [None]:
# ------------------------------------------ Logistic Regression Multiclass Model ------------------------------------------

In [None]:
# ---- Split ----
X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)

In [None]:
# ---- Model & Train ----
lr = LogisticRegression(multi_class='ovr',solver='liblinear')
model = lr.fit(X_train, np.ravel(y_train))

In [None]:
# ---- Evaluate ----
accuracy = model.score(X_test, y_test)
print ("Accuracy is {}".format(accuracy))

In [None]:
# ------ Simple Test ---------
print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')
print(f'cuisine: {y_test.iloc[50]}')

In [None]:
test= X_test.iloc[50].values.reshape(-1, 1).T
proba = model.predict_proba(test)
classes = model.classes_
resultdf = pd.DataFrame(data=proba, columns=classes)
topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])
topPrediction.head()

In [None]:
#-------------------------------------------------------- Predict --------------------------------------------------------
y_pred = model.predict(X_test)

In [None]:
#-------------------------------------------------------- Evaluate --------------------------------------------------------
print(classification_report(y_test,y_pred))

In [None]:
#----------------------------------------------------- Other Classifiers -----------------------------------------------------

In [None]:
# ---- Imports ----
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve
import numpy as np

In [None]:
# ---- Split Data ----
X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)

In [None]:
# ---- Models Setup ----
C = 10
classifiers = {
    'Linear SVC': SVC(kernel='linear', C=C, probability=True,random_state=0),
    'SVC': SVC(),
    'RFST': RandomForestClassifier(n_estimators=100),
    'ADA': AdaBoostClassifier(n_estimators=100)
}
n_classifiers = len(classifiers)

In [None]:

# ---- Train & Evaluate ----
for index, (name, classifier) in enumerate(classifiers.items()):
    classifier.fit(X_train, np.ravel(y_train))

    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy (train) for %s: %0.1f%% " % (name, accuracy * 100))
    print(classification_report(y_test,y_pred))