In [2]:
import nltk
import numpy as np
import random 
import string

import bs4 as bs
import requests 
import re

import warnings
warnings.filterwarnings = False

In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [25]:
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\shiva\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\shiva\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\shiva\AppData\Roaming\nltk_data...


True

In [26]:
r = requests.get('https://en.wikipedia.org/wiki/Cuisine')
raw_html = r.text

In [27]:
r

<Response [200]>

In [28]:
corpus_html = bs.BeautifulSoup(raw_html)

corpus_paras = corpus_html.find_all('p')
corpus_text = ''

for para in corpus_paras:
    corpus_text += para.text
    
corpus_text = corpus_text.lower()

In [29]:
corpus_text

'\na cuisine  is a style of cooking characterized by distinctive ingredients, techniques and dishes, and usually associated with a specific culture or geographic region. regional food preparation techniques, customs, and ingredients  combine to enable dishes unique to a region.[1]\na cuisine is partly determined by ingredients that are available locally or through trade. regional ingredients are developed and commonly contribute to a regional or national cuisine, such as japanese rice in japanese cuisine or new mexico chile in new mexican cuisine. likewise, national dishes have variations, such as gyros in greek cuisine and hamburger in american cuisine.\nreligious food laws can also exercise an influence on cuisine, such as hinduism in indian cuisine, sikhism in punjabi cuisine, buddhism in east asian cuisine, christianity in european cuisine,[2][3][4] islam in middle eastern cuisine, and judaism in jewish and israeli cuisine.\ncuisine is borrowed from the french meaning cooking, culi

In [30]:
corpus_text = re.sub(r'\[[0-9]*\]',' ',corpus_text)
corpus_text = re.sub(r'\s+',' ',corpus_text)

In [31]:
corpus_sentences = nltk.tokenize.sent_tokenize(corpus_text)
corpus_words = nltk.tokenize.word_tokenize(corpus_text) 

In [32]:
corpus_sentences

[' a cuisine is a style of cooking characterized by distinctive ingredients, techniques and dishes, and usually associated with a specific culture or geographic region.',
 'regional food preparation techniques, customs, and ingredients combine to enable dishes unique to a region.',
 'a cuisine is partly determined by ingredients that are available locally or through trade.',
 'regional ingredients are developed and commonly contribute to a regional or national cuisine, such as japanese rice in japanese cuisine or new mexico chile in new mexican cuisine.',
 'likewise, national dishes have variations, such as gyros in greek cuisine and hamburger in american cuisine.',
 'religious food laws can also exercise an influence on cuisine, such as hinduism in indian cuisine, sikhism in punjabi cuisine, buddhism in east asian cuisine, christianity in european cuisine, islam in middle eastern cuisine, and judaism in jewish and israeli cuisine.',
 'cuisine is borrowed from the french meaning cookin

In [33]:
greeting_inputs = ("hey","good morning","good evening","morning","evening","hi","whatsup",)
greeting_responses = ["hey","hey hows you?","*nods*","hello, how you doing","hello","Welcome I am good",]

def greet_response(greeting):
    for token in greeting.split():
        if token.lower() in greeting_inputs:
            return random.choice(greeting_responses)

In [34]:
wn_lemmatizer = nltk.stem.WordNetLemmatizer()

def lemmatize_corpus(tokens):
    return [wn_lemmatizer.lemmatize(token) for token in tokens]

punct_removal_dict = dict((ord(punctuation),None) for punctuation in string.punctuation)

def get_processed_text(document):
    return lemmatize_corpus(nltk.tokenize.word_tokenize(document.lower().translate(punct_removal_dict)))

In [37]:
def respond(user_input):
    global corpus_sentences
    bot_response = ''
    corpus_sentences.append(user_input)
    
    word_vectorizer = TfidfVectorizer(tokenizer=get_processed_text, stop_words='english')
    corpus_word_vectors = word_vectorizer.fit_transform(corpus_sentences)
    
    cos_sim_vectors = cosine_similarity(corpus_word_vectors[-1], corpus_word_vectors)
    similar_response_idx = cos_sim_vectors.argsort()[0][-2]
    
    matched_vector = cos_sim_vectors.flatten()
    matched_vector.sort()
    vector_matched = matched_vector[-2]
    
    if vector_matched == 0:
        bot_response = bot_response + "I am sorry, what is it, again?"
        return bot_response
    else:
        bot_response = bot_response + corpus_sentences[similar_response_idx]
        return bot_response

In [None]:
chat = True
print("Hello, What do you want to learn about Cuisines today?")
while (chat == True) :
    user_query=input()
    user_query = user_query.lower()
    if user_query != 'quit':
        if user_query == 'thanks' or user_query == 'thank you':
            chat = False
            print("CuisineBot: You are welcome!")
        else:
            if greet_response (user_query) != None:
                print("CuisineBot:" + greet_response (user_query))
            else:
                print("CuisineBot: ", end="")
                print (respond (user_query))
                corpus_sentences.remove(user_query)
    else:
        chat = False
        print("CuisineBot: Good bye!")

Hello, What do you want to learn about Cuisines today?
Hi
CuisineBot:hey
cuisine
CuisineBot: cuisine
pizza
CuisineBot: I am sorry, what is it, again?
beer
CuisineBot: buffalo wings with blue cheese dressing, served with lager beer.
