## Import Libraries

In [1]:
try:
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    import seaborn as sns

    import pymongo
    from pymongo import MongoClient
    from sklearn.feature_extraction.text import CountVectorizer
    from sklearn.metrics.pairwise import cosine_similarity


    import elasticsearch
    from elasticsearch import Elasticsearch
    from elasticsearch import helpers
    print("Loaded ... ... ...")
except Exception as e:
    print("Some Modules are Missing{}".format(e))

Loaded ... ... ...


## Collect data from MongoDB to Python

In [2]:
connect = MongoClient(host="localhost", port=27017)
database = connect["foodDB"]
collection = database["Recipe_data"]


# Printing the data inserted
cursor = collection.find()
recipe_list = [record for record in cursor]
print(recipe_list)

[{'_id': ObjectId('62be8b56ce6d9b0edb8631bb'), 'recipe_name': 'Pan-Fried Zoodles with Sesame and Paneer', 'cuisine': 'Asian', 'diet': 'Low-Fat'}, {'_id': ObjectId('62be8b56ce6d9b0edb8631bc'), 'recipe_name': 'ROKU TONIC', 'cuisine': 'Indian', 'diet': 'Beverage'}, {'_id': ObjectId('62be8b56ce6d9b0edb8631bd'), 'recipe_name': "Sweet 'N' Spicy Mushroom Fry", 'cuisine': 'Asian', 'diet': 'Kid-Friendly'}, {'_id': ObjectId('62be8b56ce6d9b0edb8631be'), 'recipe_name': 'Peanut Butter Choco-Cupcakes', 'cuisine': 'British', 'diet': 'Kid-Friendly'}, {'_id': ObjectId('62be8b56ce6d9b0edb8631bf'), 'recipe_name': 'CHUNCHI PATRA PITHA', 'cuisine': 'Indian', 'diet': 'Vegetarian'}, {'_id': ObjectId('62be8b56ce6d9b0edb8631c0'), 'recipe_name': 'SWEET POTATO BAKED YOGHURT', 'cuisine': 'Indian', 'diet': 'Restaurants'}, {'_id': ObjectId('62be8b56ce6d9b0edb8631c1'), 'recipe_name': 'Tawa fried paneer', 'cuisine': 'Indian', 'diet': 'Vegetarian'}, {'_id': ObjectId('62be8b56ce6d9b0edb8631c2'), 'recipe_name': 'Vanille

## Data Visulization

In [3]:
recipe_data = pd.DataFrame(recipe_list)

In [4]:
recipe_data

Unnamed: 0,_id,recipe_name,cuisine,diet
0,62be8b56ce6d9b0edb8631bb,Pan-Fried Zoodles with Sesame and Paneer,Asian,Low-Fat
1,62be8b56ce6d9b0edb8631bc,ROKU TONIC,Indian,Beverage
2,62be8b56ce6d9b0edb8631bd,Sweet 'N' Spicy Mushroom Fry,Asian,Kid-Friendly
3,62be8b56ce6d9b0edb8631be,Peanut Butter Choco-Cupcakes,British,Kid-Friendly
4,62be8b56ce6d9b0edb8631bf,CHUNCHI PATRA PITHA,Indian,Vegetarian
...,...,...,...,...
2995,62be8b5ace6d9b0edb863d6e,Rich Butter Chicken,Indian,Non-Vegetarian
2996,62be8b5ace6d9b0edb863d6f,Semolina Coconut Pudding,Asian,Festive
2997,62be8b5ace6d9b0edb863d70,Special Orange Cake,Indian,Festive
2998,62be8b5ace6d9b0edb863d71,Shahi Spinach Ke Gole,Indian,Vegetarian


In [5]:
recipe_data.drop('_id',axis=1,inplace=True)

In [6]:
recipe_data.sample(5)

Unnamed: 0,recipe_name,cuisine,diet
1639,Zucchini and Chocolate Muffins,American,Kid-Friendly
1432,Tomato Pickups,Indian,Vegetarian
1384,Zamin Doz Machhli,Indian,Non-Vegetarian
244,Anjeer ke kofte,Indian,Vegetarian
2608,Special Bread Pudding,Indian,Kid-Friendly


### Creating a elastic Search Instance

In [7]:
ENDPOINT = "http://localhost:9200/"
es = Elasticsearch(timeout=600,hosts=ENDPOINT)

  es = Elasticsearch(timeout=600,hosts=ENDPOINT)


In [8]:
es.ping()

  es.ping()


True

In [9]:
recipe_data.isna().sum()

recipe_name    0
cuisine        0
diet           0
dtype: int64

#### We need to convert the data an appropriate format that elastic search can understand

In [10]:
recipe_el = recipe_data.to_dict('records')
recipe_el[0]

{'recipe_name': 'Pan-Fried Zoodles with Sesame and Paneer',
 'cuisine': 'Asian',
 'diet': 'Low-Fat'}

## we need to convert the data into ELK format

In [17]:
def generator(recipe_el):
    for c,line in enumerate(recipe_el):
        yield {
    '_index': 'recipe',
    '_type': '_doc',
    '_id': line.get("show_id",None),
    '_source': {       
       'recipe_name': line.get("recipe_name",""),
       'cuisine':line.get('cuisine',""),
       'diet':line.get('diet',""),
        }
         }
    raise StopIteration

In [18]:
mycustom = generator(recipe_el)

In [19]:
mycustom

<generator object generator at 0x7f7c2ffbf0b0>

In [20]:
next(mycustom)

{'_index': 'recipe',
 '_type': '_doc',
 '_id': None,
 '_source': {'recipe_name': 'Pan-Fried Zoodles with Sesame and Paneer',
  'cuisine': 'Asian',
  'diet': 'Low-Fat'}}

In [21]:
Settings={
   "settings":{
      "number_of_shards":1,
      "number_of_replicas":0
   },
   "mappings":{
      "properties":{
         "recipe_name":{
            "type":"text"
         },
      }
   }
}

In [22]:
IndexName = ''
my = es.indices.create(index='recipe', ignore=[400,404], body=Settings)

  my = es.indices.create(index='recipe', ignore=[400,404], body=Settings)
  my = es.indices.create(index='recipe', ignore=[400,404], body=Settings)
  my = es.indices.create(index='recipe', ignore=[400,404], body=Settings)


In [23]:
try:
    res = helpers.bulk(es, generator(recipe_el))
    print('Working')
except Exception as e:
    pass

  res = helpers.bulk(es, generator(recipe_el))
  res = helpers.bulk(es, generator(recipe_el))


In [11]:
def convert_elasticquery(text):
    query = {
      "_source": ["recipe_name"]
      ,"size": 1
      ,"query":{
        "match": {
          "recipe_name": text
        }
      }
    }

    #es = Elasticsearch(timeout=600, hosts=os.getenv("ENDPOINT"))
    res = es.search(index='recipe', body=query)

    title = [x['_source']  for x in res['hits']['hits']]
    return title[0]['recipe_name']

In [12]:
convert_elasticquery('paneer')

  res = es.search(index='recipe', body=query)
  res = es.search(index='recipe', body=query)


'Paneer Gobi'

### From above Observation 2997 unique recipe name 

In [13]:
recipe_data['cuisine'].value_counts(normalize=True)

Indian            0.779667
Continental       0.066000
Anglo-Indian      0.051333
American          0.041333
Chinese           0.013333
Italian           0.009667
Asian             0.005667
Mexican           0.004667
Malaysian         0.003667
Arab              0.002667
English           0.002667
Thai              0.002667
French            0.002000
Indo-Chinese      0.001667
Mediterranean     0.001667
Sri-Lankan        0.001333
African           0.001333
Indo-American     0.001333
Afghani           0.001000
South-American    0.000667
Middle-East       0.000667
Russian           0.000667
Pakistani         0.000667
Japanese          0.000667
Singapore         0.000333
Iranian           0.000333
Turkish           0.000333
Goan              0.000333
British           0.000333
Tibetan           0.000333
Lebanese          0.000333
Indonesian        0.000333
Spanish           0.000333
Name: cuisine, dtype: float64

### From above observation 80% of recipe cuisine are Indian 

In [14]:
recipe_data.recipe_name.value_counts()

Apple Cobbler                               2
Amaranth Stem and Jack Fruit Seeds Palya    2
Sugar free Milk Chocolate                   1
Egg And Cauliflower Casserole               1
Veg Boiled Yong Tow Foo                     1
                                           ..
Green Onion Pancakes                        1
Anglo-Indian Beef And Vegetable Stew        1
Agra Petha                                  1
A Quick and Simple Mutton Lamb Curry        1
Rassewale Aloo Ki Sabzi                     1
Name: recipe_name, Length: 2998, dtype: int64

### Frome above Observation 2997 unique product we have

In [15]:
recipe_data['diet'].value_counts()

Vegetarian        1493
Kid-Friendly       640
Non-Vegetarian     521
Festive            311
Vegan               13
Low-Calorie          7
High-Protein         7
Low-Fat              3
Diet                 3
Beverage             1
Restaurants          1
Name: diet, dtype: int64

### Creating Model

In [16]:
recipe_data.to_csv('recipe_data_scrap.csv')

In [17]:
new_df = recipe_data.copy()

In [18]:
new_df['cuisine_diet'] = new_df['cuisine'] + ' ' +new_df['diet']

In [19]:
new_df = new_df[['recipe_name','cuisine_diet']]
new_df.head(5)

Unnamed: 0,recipe_name,cuisine_diet
0,Pan-Fried Zoodles with Sesame and Paneer,Asian Low-Fat
1,ROKU TONIC,Indian Beverage
2,Sweet 'N' Spicy Mushroom Fry,Asian Kid-Friendly
3,Peanut Butter Choco-Cupcakes,British Kid-Friendly
4,CHUNCHI PATRA PITHA,Indian Vegetarian


In [20]:
new_df['cuisine_diet'] = new_df['cuisine_diet'].apply(lambda x: x.lower())
new_df.head()

Unnamed: 0,recipe_name,cuisine_diet
0,Pan-Fried Zoodles with Sesame and Paneer,asian low-fat
1,ROKU TONIC,indian beverage
2,Sweet 'N' Spicy Mushroom Fry,asian kid-friendly
3,Peanut Butter Choco-Cupcakes,british kid-friendly
4,CHUNCHI PATRA PITHA,indian vegetarian


In [115]:
new_df.to_csv('recipe_data.csv')

In [21]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   recipe_name   3000 non-null   object
 1   cuisine_diet  3000 non-null   object
dtypes: object(2)
memory usage: 47.0+ KB


In [22]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(max_features=5000, stop_words="english")
vectors = vectorizer.fit_transform(new_df['cuisine_diet']).toarray()

In [23]:
vectors

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.76288608],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.76288608]])

In [24]:
vectorizer.get_feature_names()

['afghani',
 'african',
 'american',
 'anglo',
 'arab',
 'asian',
 'beverage',
 'british',
 'calorie',
 'chinese',
 'continental',
 'diet',
 'east',
 'english',
 'fat',
 'festive',
 'french',
 'friendly',
 'goan',
 'high',
 'indian',
 'indo',
 'indonesian',
 'iranian',
 'italian',
 'japanese',
 'kid',
 'lankan',
 'lebanese',
 'low',
 'malaysian',
 'mediterranean',
 'mexican',
 'middle',
 'non',
 'pakistani',
 'protein',
 'restaurants',
 'russian',
 'singapore',
 'south',
 'spanish',
 'sri',
 'thai',
 'tibetan',
 'turkish',
 'vegan',
 'vegetarian']

In [25]:
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(vectors)

In [26]:
res = sorted(enumerate(similarity[0]),reverse=True, key = lambda x:x[1])[1:6]
for i in res:
        print(new_df.iloc[i[0]].recipe_name)

Diet Salad
Low Fat Diet Salad
Asian Noodles
Soya Grill
Vegetable Tofu Pancakes


In [27]:
new_df[new_df['recipe_name'] == 'Tuppa Dosa'].index[0]

1762

In [28]:
def recommend(recipe):
    """Creating recommendation function to 
       recommend most five recipe_name from 
       user input
    """
    recipe_index = new_df[new_df['recipe_name'] == recipe].index[0]
    distances = similarity[recipe_index]
    recipe_list = sorted(enumerate(distances),reverse=True, key = lambda x:x[1])[1:11]
    
    for i in recipe_list:
        print(new_df.iloc[i[0]].recipe_name)

In [29]:
recommend('Rich Butter Chicken')

Ooty Pumpkin Pie
Railway Mutton Curry
Home-Style Dahi Machcha
Tandoori Salmon
Egg Curry in Coconut Masala
Chicken Curry with Sour Cream
Ambot Tik
Grilled Dahi Chicken
Akoori (Masala Scrambled Eggs)
Boneless Chicken Delight


In [30]:
recommend('Rassewale Aloo Ki Sabzi')

Tawa fried paneer
Vanillekipfer Cookies
Bread Pakoda
Cream Rolls
Apple Cobbler
Sulemani Chai
BLUEBERRY PHIRNI
Kunafa Recipe
Dahi Gujiya
MIXED FRUIT GUJJIAS


In [31]:
new_df['recipe_name'].values

array(['Pan-Fried Zoodles with Sesame and Paneer', 'ROKU TONIC',
       "Sweet 'N' Spicy Mushroom Fry", ..., 'Special Orange Cake',
       'Shahi Spinach Ke Gole', 'Rassewale Aloo Ki Sabzi'], dtype=object)

In [38]:
user_input = input("Enter Product you are looking for :")

print("---------------------")
print("\n")
recipe_name = convert_elasticquery(user_input)
recipe_product = recommend(recipe_name)


Enter Product you are looking for :chicken
---------------------


Ooty Pumpkin Pie
Railway Mutton Curry
Home-Style Dahi Machcha
Tandoori Salmon
Egg Curry in Coconut Masala
Chicken Curry with Sour Cream
Ambot Tik
Grilled Dahi Chicken
Akoori (Masala Scrambled Eggs)
Boneless Chicken Delight


  res = es.search(index='recipe', body=query)
  res = es.search(index='recipe', body=query)


In [132]:
import pickle

In [133]:
pickle.dump(similarity,open('similarity.pkl','wb'))