In [13]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sb
from tensorflow.keras.preprocessing.text import Tokenizer
import re
from tensorflow.keras import models
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.layers import Embedding
from sklearn.model_selection import KFold
from random import randint

In [2]:
raw_data_frame = pd.read_json('train.json')
raw_data_frame.head()

Unnamed: 0,id,cuisine,ingredients
0,10259,greek,"[romaine lettuce, black olives, grape tomatoes..."
1,25693,southern_us,"[plain flour, ground pepper, salt, tomatoes, g..."
2,20130,filipino,"[eggs, pepper, salt, mayonaise, cooking oil, g..."
3,22213,indian,"[water, vegetable oil, wheat, salt]"
4,13162,indian,"[black pepper, shallots, cornflour, cayenne pe..."


In [3]:
cuisines = raw_data_frame['cuisine'].unique()
cuisines

array(['greek', 'southern_us', 'filipino', 'indian', 'jamaican',
       'spanish', 'italian', 'mexican', 'chinese', 'british', 'thai',
       'vietnamese', 'cajun_creole', 'brazilian', 'french', 'japanese',
       'irish', 'korean', 'moroccan', 'russian'], dtype=object)

In [4]:
def text_prepare(ingredient):
    
    #Compilation of all the special characters
    REPLACE_BY_SPACE_RE = re.compile('[/(){}\[\]\|@,;]')
    
    #Replacing the character
    ingredient = re.sub(REPLACE_BY_SPACE_RE,' ',ingredient)
    
    #Converting the text to lowercase
    ingredient = ingredient.lower()
    
    ingredient = ingredient.strip()
    ingredient = ' '.join([word for word in ingredient.split(" ")])
    
    return ingredient
    

def loader(ingredient_list):    
    return ' '.join([text_prepare(ingredient) for ingredient in ingredient_list])

In [5]:
raw_data_frame["ingredients"] = raw_data_frame['ingredients'].apply(loader)
raw_data_frame["ingredients"][:5]

0    romaine lettuce black olives grape tomatoes ga...
1    plain flour ground pepper salt tomatoes ground...
2    eggs pepper salt mayonaise cooking oil green c...
3                       water vegetable oil wheat salt
4    black pepper shallots cornflour cayenne pepper...
Name: ingredients, dtype: object

In [22]:
cuisine_labels = dict()
for i,val in enumerate(cuisines):
    cuisine_labels[i] = val
cuisine_labels

{0: 'greek',
 1: 'southern_us',
 2: 'filipino',
 3: 'indian',
 4: 'jamaican',
 5: 'spanish',
 6: 'italian',
 7: 'mexican',
 8: 'chinese',
 9: 'british',
 10: 'thai',
 11: 'vietnamese',
 12: 'cajun_creole',
 13: 'brazilian',
 14: 'french',
 15: 'japanese',
 16: 'irish',
 17: 'korean',
 18: 'moroccan',
 19: 'russian'}

In [7]:
ingredients = np.array(raw_data_frame['ingredients'])

In [8]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(ingredients)
vocabulary_size = len(tokenizer.word_index) + 1

word_index = tokenizer.word_index
print("Vocabulary Size= ",vocabulary_size)
print("word index",word_index['italian'])

Vocabulary Size=  3065
word index 126


In [16]:
def word_input_form(word):
    token = tokenizer.texts_to_sequences([word])
    padded = pad_sequences(token,maxlen=40,padding='post',dtype='int32')
    return padded
word_input_form(ingredients[0])

array([[314, 138,  13, 128, 339,  18,   4,   1, 104,  25,  79, 489,  50,
        204,  10, 287,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0]], dtype=int32)

In [14]:
model = models.load_model('cuisine_prediction.h5')

In [27]:
def predict(word):
    input_word = word_input_form(word)
    out = np.argmax(model.predict(input_word))
    return(cuisine_labels[out])
print(ingredients[4])
predict("penne garlic basil salt black pepper red chilli")

black pepper shallots cornflour cayenne pepper onions garlic paste milk butter salt lemon juice water chili powder passata oil ground cumin boneless chicken skinless thigh garam masala double cream natural yogurt bay leaf


'italian'