In [6]:
import random
import pickle
import nltk
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import os

nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /home/harshil/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /home/harshil/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [7]:
# Get the Knowledge Base from pickle files
with open('knowledgeBase.pickle', 'rb') as handle:
    KB = pickle.load(handle)

In [8]:
# Get the user model if exists
file_path = 'userModel.pickle'
if os.path.exists(file_path):
    # File exists, open it
    with open(file_path, 'rb') as handle:
        userModel = pickle.load(handle)
else:
    # File does not exist, create it
    f = open(file_path, "x")
    f.close()
    userModel = {}

In [9]:
# Defining Greeting function
greet_inputs = ('hello', 'hi', 'how are you?')
greet_responses = ('hi', 'Hey', 'Hey There!')
def greet(sentence):
    for word in sentence.split():
        if word.lower() in greet_inputs:
            return random.choice(greet_responses)

In [10]:
# Text PreProcessing
lemmer = nltk.stem.WordNetLemmatizer()
def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]
remove_punc_dict = dict((ord(punct), None) for punct in string.punctuation)
def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punc_dict)))

In [11]:
# Defining response
def response(user_response):
    filmy_response = ''
    user_response_tokens = LemNormalize(user_response)
    response_sentences = []
    for token in user_response_tokens:
        if token in KB:
            response_sentences.extend(KB[token])
    response_sentences.append(user_response)
    TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words = 'english')
    tfidf = TfidfVec.fit_transform(response_sentences)
    vals = cosine_similarity(tfidf[-1], tfidf)
    if len(vals[0]) < 2:
        filmy_response += "I am sorry, unable to understand you."
        return filmy_response 
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if req_tfidf == 0:
        filmy_response += "I am sorry, unable to understand you."
        return filmy_response
    else:
        filmy_response += response_sentences[idx]
        return filmy_response

In [12]:
# Function to extract name
def extractName(user_response):
    # Extract names using named entity recognition
    # Tokenize the text into words
    words = nltk.word_tokenize(user_response)
    
    # Perform part-of-speech tagging
    pos_tags = nltk.pos_tag(words)
    
    # Use NLTK's named entity recognition (NER) to extract named entities
    named_entities = nltk.ne_chunk(pos_tags)
    
    # Initialize a list to store extracted names
    name = ''
    
    # Traverse the named entities tree and extract Named entities
    for entity in named_entities:
        if entity[0] == 'i' or entity[0] == 'my':
            continue
        if len(entity) > 1:
            if entity[1] == 'NN' or entity[1] == 'JJ':
                name += entity[0]
        else:
            if isinstance(entity, nltk.tree.Tree) and (entity.label() == 'PERSON' or entity.label() == 'ORGANIZATION'):
                name += ' '.join([token[0] for token in entity.leaves()])
            
    if not name:
        name += words[-1]
    return name

In [13]:
# Defining the Chat Flow
def FilmyBot():
    flag = True
    print('Bot: Hello! I am FilmyBot, your guide on Bollywood. For ending the convo type bye!')
    print('Bot: For starters, what is your name?')
    
    user_response = input()
    name = extractName(user_response.lower())
    if name in userModel:
        print(f'Bot: Hi {name}. Welcome back! Happy to chat more about bollywood with you today!')
    else:
        print(f'Bot: Hi {name}. Happy to have you onboard this very Dhamakedar experience!')
        userModel[name] = {"likes": "", "dislikes": ""}
        print('Bot: What do you like about Bollywood?')
        user_response = input().lower()
        userModel[name]["likes"] = user_response
        print('Bot: Ok nice! Now what do you disike about Bollywood?')
        user_response = input().lower()
        userModel[name]["dislikes"] = user_response   
        print('Bot: Fantastic! I am happy to learn more about you. Blast off any questions who have')
        with open(file_path, 'wb') as handle:
            pickle.dump(userModel, handle)
    
    while(flag == True):
        user_response = input()
        user_response = user_response.lower()
        if(user_response != 'bye'):
            if(user_response == 'thank you' or user_response == 'thanks'):
                flag = False
                print('Bot: You are Welcome..')
            else:
                if(greet(user_response) != None):
                    print('Bot '+ greet(user_response))
                else:
                    print('Bot: ', end = '')
                    print(response(user_response))
        else:
            flag = False
            print ('Bot: Goodbye!')

In [10]:
# Call Filmy bot using the following command
FilmyBot()

Bot: Hello! I am FilmyBot, your guide on Bollywood. For ending the convo type bye!
Bot: For starters, what is your name?


 Raaed


Bot: Hi raaed. Happy to have you onboard this very Dhamakedar experience!
Bot: What do you like about Bollywood?


 I like 2000s coming of age movies


Bot: Ok nice! Now what do you disike about Bollywood?


 Salman Khan


Bot: Fantastic! I am happy to learn more about you. Blast off any questions who have


 Amitabh Bachchan's famous movies


Bot: 



I am sorry, unable to understand you.


 What are Salman Khan best movies? 


Bot:  the most popular actors in pakistan are the three khans of bollywood: salman, shah rukh, and aamir.


 aamir best movies recently


Bot: I am sorry, unable to understand you.


 aamir khan movies


Bot:  the most popular actors in pakistan are the three khans of bollywood: salman, shah rukh, and aamir.


 bye


Bot: Goodbye!


In [11]:
# Call Filmy bot using the following command
FilmyBot()

Bot: Hello! I am FilmyBot, your guide on Bollywood. For ending the convo type bye!
Bot: For starters, what is your name?


 raaed


Bot: Hi raaed. Welcome back! Happy to chat more about bollywood with you today!


 thanks


Bot: You are Welcome..
