# Build Classification Models

We will use the dataset we saved from the last lesson full of balanced, clean data all about cuisines

We will use this dataset with a variety of classifiers to predict a given national cuisine based on a group of ingredients

In [None]:
# importing libraries
import pandas as pd
cuisines_df = pd.read_csv("./cleaned_cuisines.csv")
cuisines_df.head()

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve
from sklearn.svm import SVC
import numpy as np

In [None]:
# Divide the X and y coordinates into two dataframes for training. cuisine can be the labels dataframe

cuisines_label_df = cuisines_df['cuisine']
cuisines_label_df.head()

In [None]:
# Drop that Unnamed: 0 column and the cuisine column, calling drop(). Save the rest of the data as trainable features

cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)
cuisines_feature_df.head()

# Split thd data

In [None]:
# Split your data into training and testing groups

X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)

# Apply logistic regression

Since we are using the multiclass case, we need to choose what scheme to use and what solver to set.
Use LogisticRegression with a multiclass setting and the liblinear solver to train.

In [None]:
# Create a logistic regression with multi_class set to ovr and the solver set to liblinear:

lr = LogisticRegression(multi_class='ovr',solver='liblinear')
model = lr.fit(X_train, np.ravel(y_train))

accuracy = model.score(X_test, y_test)
print ("Accuracy is {}".format(accuracy))

In [None]:
# We can see this model in action by testing one row of data (#100)
print(f'ingredients: {X_test.iloc[50][X_test.iloc[100]!=0].keys()}')
print(f'cuisine: {y_test.iloc[100]}')

In [None]:
# check for the accuracy of this prediction

#rehsape to 2d array and transpose
test= X_test.iloc[100].values.reshape(-1, 1).T
# predict with score
proba = model.predict_proba(test)
classes = model.classes_
# create df with classes and scores
resultdf = pd.DataFrame(data=proba, columns=classes)

# create df to show results
topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])
topPrediction.head()

In [None]:
# Get more detail by printing a classification report

y_pred = model.predict(X_test)
print(classification_report(y_test,y_pred))