In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("ProcessedData.csv")
df.head()

Unnamed: 0,Name,Cuisines,Area,Full_Address,AverageCost,Total Reviews,Total Ratings,Cost Category,Features,Testing Features,bag_of_words
0,Jyoti Vihar,south indian,Camac Street Area,"3A/1, Ho Chi Minh Sarani, Camac Street Area, K...",300,6428,4.1,midrange,homedelivery takeaway vegonly indoorseating,homedelivery takeaway vegonly indoorseating mi...,south indian homedelivery takeaway vegonly ind...
1,WOW! Momo,"momos, fast food, tibetan",Park Street Area,"57, Park Street, Beside Vodafone Store, Park S...",350,11569,4.1,midrange,homedelivery takeaway indoorseating,homedelivery takeaway indoorseating midrange,momos fast food tibetan homedelivery takeaway ...
2,Zam Zam,"biryani, rolls",Park Circus Area,"28/A, Syed Amir Ali Avenue, Park Circus Area, ...",500,85001,4.3,expensive,homedelivery takeaway indoorseating,homedelivery takeaway indoorseating expensive,biryani rolls homedelivery takeaway indoorseat...
3,Daily Bhoj,bengali,Topsia,"139/1G, Tljala Road, Topsia, Kolkata",150,2260,3.9,cheapeats,homedelivery,homedelivery cheapeats,bengali homedelivery cheapeats
4,Behnam,"biryani, north indian, mughlai, rolls, kebab, ...",Topsia,"139/1G, Tiljala Road, Near OYO Townhouse 229, ...",200,151,2.9,midrange,homedelivery takeaway,homedelivery takeaway midrange,biryani north indian mughlai rolls kebab awadh...


In [3]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.wordnet import WordNetLemmatizer

In [4]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
def process_sentences(text):
    temp_sent =[]

    # Tokenize words
    words = word_tokenize(text)
    
    for word in words: 
        # Remove stop words and non alphabet tokens
        if word not in stop_words and word.isalpha(): 
            temp_sent.append(word)

    # Some other clean-up
    full_sentence = ' '.join(temp_sent)
    
    return full_sentence

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import manhattan_distances


In [6]:
def Evaluate_Model_Euclidean(description,Area):
    # Convert user input to lowercase
    description = description.lower()
    data = df.copy() 
    data = data[data['Area'] == Area]
    # Process user description text input 
    description = process_sentences(description)
    description = description.strip()
    # Init a TF-IDF vectorizer
    tfidfvec = TfidfVectorizer()
    # Fit data on processed reviews
    vec = tfidfvec.fit(data["bag_of_words"])
    features = vec.transform(data["bag_of_words"])
    # Transform user input data based on fitted model
    description_vector =  vec.transform([description])
    # Calculate the similarity for the differnt models
    distance = euclidean_distances(description_vector, features)
    similarity = 1/(1+distance)
    data['similarity'] = similarity[0]
    # Sort data frame by similarities
    data.sort_values(by='similarity', ascending=False, inplace=True)
    return data[['Name', 'Area', 'AverageCost', 'Cuisines','Testing Features', 'Total Ratings', 'similarity']].head(5)


In [7]:
def Evaluate_Model_Manhattan(description,Area):
    # Convert user input to lowercase
    description = description.lower()
    data = df.copy() 
    data = data[data['Area'] == Area]
    # Process user description text input 
    description = process_sentences(description)
    description = description.strip()
    # Init a TF-IDF vectorizer
    tfidfvec = TfidfVectorizer()
    # Fit data on processed reviews
    vec = tfidfvec.fit(data["bag_of_words"])
    features = vec.transform(data["bag_of_words"])
    # Transform user input data based on fitted model
    description_vector =  vec.transform([description])
    # Calculate the similarity for the differnt models
    distance = manhattan_distances(description_vector, features)
    similarity = 1/(1+distance)
    data['similarity'] = similarity[0]
    # Sort data frame by similarities
    data.sort_values(by='similarity', ascending=False, inplace=True)
    return data[['Name', 'Area', 'AverageCost', 'Cuisines','Testing Features', 'Total Ratings', 'similarity']].head(5)


In [8]:
def Evaluate_Model_LinearKernel(description,Area):
    
    # Convert user input to lowercase
    description = description.lower()
    data = df.copy()
    data = data[data['Area'] == Area]  
    # Process user description text input 
    description = process_sentences(description)
    description = description.strip()
    
    # Init a TF-IDF vectorizer
    tfidfvec = TfidfVectorizer()

    # Fit data on processed reviews
    vec = tfidfvec.fit(data["bag_of_words"])
    features = vec.transform(data["bag_of_words"])

    # Transform user input data based on fitted model
    description_vector =  vec.transform([description])

    # Calculate the similarity for the differnt models
    similarity = linear_kernel(description_vector, features)
    # Add similarities to data frame
    data['similarity'] = similarity[0]
    # Sort data frame by similarities
    data.sort_values(by='similarity', ascending=False, inplace=True)
    return data[['Name', 'Area', 'AverageCost', 'Cuisines','Testing Features', 'Total Ratings', 'similarity']].head(5)

In [9]:
def Evaluate_Model_CosineSimilarity(description,Area):
    
    # Convert user input to lowercase
    description = description.lower()
    data = df.copy()
    data = data[data['Area'] == Area]  
    # Process user description text input 
    description = process_sentences(description)
    description = description.strip()
    
    # Init a TF-IDF vectorizer
    tfidfvec = TfidfVectorizer()

    # Fit data on processed reviews
    vec = tfidfvec.fit(data["bag_of_words"])
    features = vec.transform(data["bag_of_words"])

    # Transform user input data based on fitted model
    description_vector =  vec.transform([description])

    # Calculate the similarity for the differnt models
    similarity = cosine_similarity(description_vector, features)
    # Add similarities to data frame
    data['similarity'] = similarity[0]
    # Sort data frame by similarities
    data.sort_values(by='similarity', ascending=False, inplace=True)
    return data[['Name', 'Area', 'AverageCost', 'Cuisines','Testing Features', 'Total Ratings', 'similarity']].head(5)

In [10]:
def Evaluate_Model_TFIDF_MaxFeatures(description,Area):
    
    # Convert user input to lowercase
    description = description.lower()
    data = df.copy()
    data = data[data['Area'] == Area]  
    # Process user description text input 
    description = process_sentences(description)
    description = description.strip()
    
    # Init a TF-IDF vectorizer
    tfidfvec = TfidfVectorizer(max_features=2)

    # Fit data on processed reviews
    vec = tfidfvec.fit(data["bag_of_words"])
    features = vec.transform(data["bag_of_words"])

    # Transform user input data based on fitted model
    description_vector =  vec.transform([description])

    # Calculate the similarity for the differnt models
    similarity = cosine_similarity(description_vector, features)
    # Add similarities to data frame
    data['similarity'] = similarity[0]
    # Sort data frame by similarities
    data.sort_values(by='similarity', ascending=False, inplace=True)
    return data[['Name', 'Area', 'AverageCost', 'Cuisines','Testing Features', 'Total Ratings', 'similarity']].head(5)

In [11]:
def Evaluate_Model_CountVec(description,Area):
    
    # Convert user input to lowercase
    description = description.lower()
    data = df.copy()
    data = data[data['Area'] == Area]  
    # Process user description text input 
    description = process_sentences(description)
    description = description.strip()
    
    # Init a TF-IDF vectorizer
    contVect = CountVectorizer()

    # Fit data on processed reviews
    vec = contVect.fit(data["bag_of_words"])
    features = vec.transform(data["bag_of_words"])

    # Transform user input data based on fitted model
    description_vector =  vec.transform([description])

    # Calculate the similarity for the differnt models
    similarity = cosine_similarity(description_vector, features)
    # Add similarities to data frame
    data['similarity'] = similarity[0]
    # Sort data frame by similarities
    data.sort_values(by='similarity', ascending=False, inplace=True)
    return data[['Name', 'Area', 'AverageCost', 'Cuisines','Testing Features', 'Total Ratings', 'similarity']].head(5)

### Testing for model

### Test Case 1

In [12]:
user_choice_cuisine ='momos biryani'
user_choice_Features = 'indoorseating'
user_choice_CostCategory ='midrange'
user_choice_Area='Park Street Area'

description = user_choice_cuisine + ' ' + user_choice_Features+' '+user_choice_CostCategory

In [13]:
Evaluate_Model_Euclidean(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
150,New Noodle King,Park Street Area,400,"chinese, seafood, momos",homedelivery takeaway indoorseating midrange,3.9,0.53573
631,Noodle Time,Park Street Area,400,"chinese, momos, seafood",homedelivery takeaway indoorseating midrange,4.0,0.53573
1,WOW! Momo,Park Street Area,350,"momos, fast food, tibetan",homedelivery takeaway indoorseating midrange,4.1,0.509509
261,The Biryani Mahal,Park Street Area,400,"biryani, north indian",homedelivery takeaway indoorseating midrange,3.5,0.507606
10,KFC,Park Street Area,400,"burger, fast food, biryani, desserts, beverages",homedelivery takeaway indoorseating midrange,4.2,0.473708


In [14]:

Evaluate_Model_Manhattan(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
631,Noodle Time,Park Street Area,400,"chinese, momos, seafood",homedelivery takeaway indoorseating midrange,4.0,0.344728
150,New Noodle King,Park Street Area,400,"chinese, seafood, momos",homedelivery takeaway indoorseating midrange,3.9,0.344728
261,The Biryani Mahal,Park Street Area,400,"biryani, north indian",homedelivery takeaway indoorseating midrange,3.5,0.323873
1,WOW! Momo,Park Street Area,350,"momos, fast food, tibetan",homedelivery takeaway indoorseating midrange,4.1,0.301257
229,Kaaram-The Andhra Kitchen,Park Street Area,500,"biryani, andhra",homedelivery takeaway indoorseating expensive,4.3,0.278262


In [15]:
Evaluate_Model_CosineSimilarity(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
150,New Noodle King,Park Street Area,400,"chinese, seafood, momos",homedelivery takeaway indoorseating midrange,3.9,0.624491
631,Noodle Time,Park Street Area,400,"chinese, momos, seafood",homedelivery takeaway indoorseating midrange,4.0,0.624491
1,WOW! Momo,Park Street Area,350,"momos, fast food, tibetan",homedelivery takeaway indoorseating midrange,4.1,0.53663
261,The Biryani Mahal,Park Street Area,400,"biryani, north indian",homedelivery takeaway indoorseating midrange,3.5,0.529519
10,KFC,Park Street Area,400,"burger, fast food, biryani, desserts, beverages",homedelivery takeaway indoorseating midrange,4.2,0.382833


In [16]:

Evaluate_Model_LinearKernel(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
150,New Noodle King,Park Street Area,400,"chinese, seafood, momos",homedelivery takeaway indoorseating midrange,3.9,0.624491
631,Noodle Time,Park Street Area,400,"chinese, momos, seafood",homedelivery takeaway indoorseating midrange,4.0,0.624491
1,WOW! Momo,Park Street Area,350,"momos, fast food, tibetan",homedelivery takeaway indoorseating midrange,4.1,0.53663
261,The Biryani Mahal,Park Street Area,400,"biryani, north indian",homedelivery takeaway indoorseating midrange,3.5,0.529519
10,KFC,Park Street Area,400,"burger, fast food, biryani, desserts, beverages",homedelivery takeaway indoorseating midrange,4.2,0.382833


In [17]:
Evaluate_Model_TFIDF_MaxFeatures(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
1,WOW! Momo,Park Street Area,350,"momos, fast food, tibetan",homedelivery takeaway indoorseating midrange,4.1,0.0
969,Wow Chicken,Park Street Area,150,"burger, fast food, american, beverages",homedelivery takeaway cheapeats,4.0,0.0
963,Schezwan Kitchen,Park Street Area,100,"north indian, chinese, burger, sandwich",homedelivery takeaway cheapeats,3.9,0.0
955,Waldorf,Park Street Area,1000,"chinese, asian",homedelivery takeaway indoorseating expensive,3.9,0.0
954,Gabbar's Bar & Kitchen,Park Street Area,1500,"north indian, chinese, italian, beverages",homedelivery takeaway indoorseating expensive,4.0,0.0


In [18]:
Evaluate_Model_CountVec(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
631,Noodle Time,Park Street Area,400,"chinese, momos, seafood",homedelivery takeaway indoorseating midrange,4.0,0.566947
261,The Biryani Mahal,Park Street Area,400,"biryani, north indian",homedelivery takeaway indoorseating midrange,3.5,0.566947
150,New Noodle King,Park Street Area,400,"chinese, seafood, momos",homedelivery takeaway indoorseating midrange,3.9,0.566947
1,WOW! Momo,Park Street Area,350,"momos, fast food, tibetan",homedelivery takeaway indoorseating midrange,4.1,0.53033
10,KFC,Park Street Area,400,"burger, fast food, biryani, desserts, beverages",homedelivery takeaway indoorseating midrange,4.2,0.474342


### Test Case 8

In [19]:
# User input
user_choice_cuisine ='chinese'
user_choice_Features = 'homedelivery'
user_choice_CostCategory ='expensive'
user_choice_Area='Sealdah Area'

description = user_choice_cuisine + ' ' + user_choice_Features+' '+user_choice_CostCategory


In [20]:
Evaluate_Model_CosineSimilarity(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
922,Foodcourt Restaurant,Sealdah Area,600,"chinese, fast food",homedelivery indoorseating expensive,3.8,0.612212
746,China Express,Sealdah Area,450,"chinese, seafood, beverages",homedelivery takeaway indoorseating expensive,0.0,0.572844
593,Danish Restaurant,Sealdah Area,500,"mughlai, north indian, chinese, kebab, rolls, ...",homedelivery takeaway expensive,4.2,0.360557
4163,Sipsmart Cloud Kitchen,Sealdah Area,150,"chinese, bakery",homedelivery takeaway cheapeats,0.0,0.305055
887,Red Chillies,Sealdah Area,400,"chinese, thai",homedelivery takeaway indoorseating midrange,4.1,0.269554


In [21]:
Evaluate_Model_LinearKernel(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
922,Foodcourt Restaurant,Sealdah Area,600,"chinese, fast food",homedelivery indoorseating expensive,3.8,0.612212
746,China Express,Sealdah Area,450,"chinese, seafood, beverages",homedelivery takeaway indoorseating expensive,0.0,0.572844
593,Danish Restaurant,Sealdah Area,500,"mughlai, north indian, chinese, kebab, rolls, ...",homedelivery takeaway expensive,4.2,0.360557
4163,Sipsmart Cloud Kitchen,Sealdah Area,150,"chinese, bakery",homedelivery takeaway cheapeats,0.0,0.305055
887,Red Chillies,Sealdah Area,400,"chinese, thai",homedelivery takeaway indoorseating midrange,4.1,0.269554


In [22]:
Evaluate_Model_LinearKernel(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
922,Foodcourt Restaurant,Sealdah Area,600,"chinese, fast food",homedelivery indoorseating expensive,3.8,0.612212
746,China Express,Sealdah Area,450,"chinese, seafood, beverages",homedelivery takeaway indoorseating expensive,0.0,0.572844
593,Danish Restaurant,Sealdah Area,500,"mughlai, north indian, chinese, kebab, rolls, ...",homedelivery takeaway expensive,4.2,0.360557
4163,Sipsmart Cloud Kitchen,Sealdah Area,150,"chinese, bakery",homedelivery takeaway cheapeats,0.0,0.305055
887,Red Chillies,Sealdah Area,400,"chinese, thai",homedelivery takeaway indoorseating midrange,4.1,0.269554


In [23]:
Evaluate_Model_Euclidean(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
922,Foodcourt Restaurant,Sealdah Area,600,"chinese, fast food",homedelivery indoorseating expensive,3.8,0.531726
746,China Express,Sealdah Area,450,"chinese, seafood, beverages",homedelivery takeaway indoorseating expensive,0.0,0.519672
593,Danish Restaurant,Sealdah Area,500,"mughlai, north indian, chinese, kebab, rolls, ...",homedelivery takeaway expensive,4.2,0.46929
4163,Sipsmart Cloud Kitchen,Sealdah Area,150,"chinese, bakery",homedelivery takeaway cheapeats,0.0,0.45894
887,Red Chillies,Sealdah Area,400,"chinese, thai",homedelivery takeaway indoorseating midrange,4.1,0.45276


In [24]:
Evaluate_Model_Manhattan(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
922,Foodcourt Restaurant,Sealdah Area,600,"chinese, fast food",homedelivery indoorseating expensive,3.8,0.333143
746,China Express,Sealdah Area,450,"chinese, seafood, beverages",homedelivery takeaway indoorseating expensive,0.0,0.302695
4163,Sipsmart Cloud Kitchen,Sealdah Area,150,"chinese, bakery",homedelivery takeaway cheapeats,0.0,0.287368
2762,Mamonis Kitchen,Sealdah Area,100,bengali,homedelivery cheapeats,4.0,0.275165
887,Red Chillies,Sealdah Area,400,"chinese, thai",homedelivery takeaway indoorseating midrange,4.1,0.259535


### Test Case 9



In [25]:
# User input
user_choice_cuisine ='biryani'
user_choice_Features = 'Indoorseating'
user_choice_CostCategory ='expensive'
user_choice_Area='Baguihati'

description = user_choice_cuisine + ' ' + user_choice_Features+' '+user_choice_CostCategory

In [26]:
Evaluate_Model_CosineSimilarity(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
2966,Food Junction,Baguihati,500,"chinese, north indian, biryani",homedelivery takeaway indoorseating expensive,3.7,0.786977
1642,The Floresta Restaurant,Baguihati,700,"chinese, north indian",homedelivery takeaway indoorseating expensive,3.7,0.651254
1606,Paramparaa,Baguihati,450,"north indian, chinese",homedelivery takeaway indoorseating expensive,4.0,0.651254
3185,Master Chef Pizza,Baguihati,500,"fast food, pizza",homedelivery takeaway indoorseating expensive,3.6,0.605871
3118,New Arsalan Biryani House,Baguihati,200,biryani,homedelivery takeaway indoorseating midrange,0.0,0.562909


In [27]:
Evaluate_Model_LinearKernel(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
2966,Food Junction,Baguihati,500,"chinese, north indian, biryani",homedelivery takeaway indoorseating expensive,3.7,0.786977
1642,The Floresta Restaurant,Baguihati,700,"chinese, north indian",homedelivery takeaway indoorseating expensive,3.7,0.651254
1606,Paramparaa,Baguihati,450,"north indian, chinese",homedelivery takeaway indoorseating expensive,4.0,0.651254
3185,Master Chef Pizza,Baguihati,500,"fast food, pizza",homedelivery takeaway indoorseating expensive,3.6,0.605871
3118,New Arsalan Biryani House,Baguihati,200,biryani,homedelivery takeaway indoorseating midrange,0.0,0.562909


In [28]:
Evaluate_Model_Euclidean(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
2966,Food Junction,Baguihati,500,"chinese, north indian, biryani",homedelivery takeaway indoorseating expensive,3.7,0.605062
1642,The Floresta Restaurant,Baguihati,700,"chinese, north indian",homedelivery takeaway indoorseating expensive,3.7,0.544912
1606,Paramparaa,Baguihati,450,"north indian, chinese",homedelivery takeaway indoorseating expensive,4.0,0.544912
3185,Master Chef Pizza,Baguihati,500,"fast food, pizza",homedelivery takeaway indoorseating expensive,3.6,0.529706
3118,New Arsalan Biryani House,Baguihati,200,biryani,homedelivery takeaway indoorseating midrange,0.0,0.516802


In [29]:
Evaluate_Model_Manhattan(description=description,Area=user_choice_Area)

Unnamed: 0,Name,Area,AverageCost,Cuisines,Testing Features,Total Ratings,similarity
2966,Food Junction,Baguihati,500,"chinese, north indian, biryani",homedelivery takeaway indoorseating expensive,3.7,0.369002
3118,New Arsalan Biryani House,Baguihati,200,biryani,homedelivery takeaway indoorseating midrange,0.0,0.35224
1642,The Floresta Restaurant,Baguihati,700,"chinese, north indian",homedelivery takeaway indoorseating expensive,3.7,0.320212
1606,Paramparaa,Baguihati,450,"north indian, chinese",homedelivery takeaway indoorseating expensive,4.0,0.320212
3185,Master Chef Pizza,Baguihati,500,"fast food, pizza",homedelivery takeaway indoorseating expensive,3.6,0.306408
