## Recipe finder
### by Abilash Ramesh

### Load all packages

In [1]:
import pandas as pd
import numpy as np
import json
import codecs
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import KNeighborsClassifier


### Load recipe dataset

In [2]:
def get_data(filename):
    df = pd.read_json(filename)
    new = []
    for s in df['ingredients']:
        s = ' '.join(s)
        new.append(s)
    df['ing'] = new
    return df


### Get ingredients from user

In [3]:
def get_user_ing(string, df):
    user_ing = string
    df = df.append({'ing':user_ing}, ignore_index=True)
    return df


### Train model and vectorizer

In [4]:
def train_data(df, n):
    vectorizer = TfidfVectorizer(use_idf = True, smooth_idf=True, stop_words = 'english',max_features = 4000)
    ing_vect = vectorizer.fit_transform((df['ing'].values))
    vec = ing_vect.todense()
    X_train_df = vec[:-1]
    y_train_df = df['cuisine'][:-1]
    X_test_df = vec[-1]
    n = n
    model = KNeighborsClassifier(n_neighbors = n, weights='uniform', algorithm='auto', metric='minkowski')
    preds = model.fit(X_train_df,y_train_df)
    return X_test_df, model


### Obtain results for the given set of ingredients

In [5]:
def get_results(test, model, n):
    
    predicted_class = model.classes_
    predicted_single_cuisine = model.predict(test)
    predicted_cuisine = model.predict_proba(test)[0]
    match_perc,match_id = model.kneighbors(test)
    pos = np.where(predicted_class == predicted_single_cuisine)
    print ("The model predicts that the ingredients resembles %s (%f resemblence)\n" %(predicted_single_cuisine[0], predicted_cuisine[pos]*100))
    for i in range(0, len(match_id[0][:n])):
        print ('Recipe No: %d (%f probable match)'%(match_id[0][i], match_perc[0][i]))

### The recipes and the closest type of cuisine is displayed
You will need to run the below code every time you want to find the cuisine/recipes

In [6]:
df = get_data('/Users/anrame/Documents/Spring 2020/Text Analytics/Project 3/yummly.json')
df = get_user_ing('thyme, basil', df)
test, mod = train_data(df, 11)

In [7]:
get_results(test, mod, 5)

The model predicts that the ingredients resembles italian (63.636364 resemblence)

Recipe No: 38052 (0.995421 probable match)
Recipe No: 33387 (1.014186 probable match)
Recipe No: 16446 (1.042872 probable match)
Recipe No: 21589 (1.074481 probable match)
Recipe No: 24719 (1.077459 probable match)


## Bonus

## Classification using MLP classifier

In [8]:
from sklearn.neural_network import MLPClassifier

In [9]:
def get_results2(test, model):
    
    predicted_class = model.classes_
    predicted_single_cuisine = model.predict(test)
    predicted_cuisine = model.predict_proba(test)[0]
    #match_perc,match_id = model.kneighbors(test)
    pos = np.where(predicted_class == predicted_single_cuisine)
    print ("The model predicts that the ingredients resembles %s (%f resemblence)\n" %(predicted_single_cuisine[0], predicted_cuisine[pos]*100))
    #for i in range(len(match_id[0])):
     #   print ('Recipe No: %d (%f probable match)'%(match_id[0][i], match_perc[0][i]))

In [13]:
def train_data2(df):
    vectorizer = TfidfVectorizer(use_idf = True, smooth_idf=True, stop_words = 'english',max_features = 4000)
    ing_vect = vectorizer.fit_transform((df['ing'].values))
    vec = ing_vect.todense()
    X_train_df = vec[:-1]
    y_train_df = df['cuisine'][:-1]
    X_test_df = vec[-1]
    n = 11
    tr2 = MLPClassifier(hidden_layer_sizes=(5, ),activation='relu', solver='adam', alpha=0.001)
    preds = tr2.fit(X_train_df,y_train_df)
    return X_test_df, tr2



In [14]:
test2, mod2 = train_data2(df)



In [15]:
get_results2(test2, mod2)

The model predicts that the ingredients resembles southern_us (78.502856 resemblence)

