### **Importing Libraries and Corpus**

In [None]:
import numpy as np
import nltk
import sklearn
import string
import random

#### **For Response**

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

#### Avoiding Warnings

In [None]:
import warnings
warnings.filterwarnings('ignore')

#### **Importing and Reading the Corpus**

In [None]:
File = open('INFORMATION.txt', 'r', errors = 'ignore')
RawDocument = File.read()
RawDocument = RawDocument.lower()
nltk.download('punkt')
nltk.download('wordnet')
#SentenceTokens = nltk.sent_tokenize(RawDocument)
#WordTokens = nltk.word_tokenize(RawDocument)
nltk.download('averaged_perceptron_tagger')
nltk.download('omw-1.4')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

### **Lexical Analysis**

#### **Converting the Corpus in a list of Sentences**

In [None]:
SentenceTokens = nltk.sent_tokenize(RawDocument)

#Demo: Printing 02 Sentence Tokens
SentenceTokens[:2]

['sony history\nin 1946, tokyo tsushin kogyo k.k.',
 '(tokyo telecommunications engineering corporation, the predecessor of sony group corporation) started as a small company with capital of just 190,000 yen and approximately 20 employees.']

#### **Converting the Corpus in a list of Words**

In [None]:
WordTokens = nltk.word_tokenize(RawDocument)

#Demo: Printing 02 Word Tokens
WordTokens[:2]

['sony', 'history']

### **Semantic Analysis** 

### **Text Preprocessing** 

In [None]:
#Grouping similar words and alloting one word for the same
Lemmer = nltk.stem.WordNetLemmatizer()

def LemTokens(Tokens):
    return [Lemmer.lemmatize(Token) for Token in Tokens]

RemovePD = dict((ord(Punctuation), None) for Punctuation in string.punctuation)
def LemNormalize(Text):
    return LemTokens(nltk.word_tokenize(Text.lower().translate(RemovePD)))

### **Greeting the Customer**

In [None]:
CustomerInputs = ("hello", "hi", "greetings", "hola", "sup", "what's up", "hey",)
BotResponses = ["Hi", "Hey", "Hi There", "Hello", "Hey There"]
def Greet(Sentence):
    for Word in Sentence.split():
        if Word.lower() in CustomerInputs:
            return random.choice(BotResponses)

### **Response Generation**

In [None]:
def Response(CustomerResponse):
    BotRespond = ''
    
    #Dealing with the most frequent words and storing it in Matrix form
    Vectorizer = TfidfVectorizer(tokenizer = LemNormalize, stop_words = 'english')
    TfidfFit = Vectorizer.fit_transform(SentenceTokens)
    vals = cosine_similarity(TfidfFit[-1], TfidfFit)
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    RequiredTfidfFit = flat[-2]
    if(RequiredTfidfFit == 0):
        BotRespond = BotRespond+"My apologies. Please Try Something Else"
        return BotRespond
    else:
        BotRespond = BotRespond+SentenceTokens[idx]
        return BotRespond

#### **Terminating Condition**

In [None]:
flag = True
print("30T: Respond with 'Bye' to Exit Anytime.")
print("30T: Hello! Welcome to Sony Electronics. Hope you're having a Nice Day.")
print("30T: I am 301, How May I Help You?")
while(flag == True):
    print()
    CustomerResponse = input("You: ")
    CustomerResponse = CustomerResponse.lower()

    #Terminating Condition
    if(CustomerResponse != 'bye'):
        if(CustomerResponse == 'thanks' or CustomerResponse == 'thank you'):
            flag == False
            print("30T: You are Welcome!")
        else:
            if(Greet(CustomerResponse) != None):
                print("30T: "+Greet(CustomerResponse))
            else:
                SentenceTokens.append(CustomerResponse)
                FinalWords = list(set(WordTokens))
                print("30T: ", end = "")
                print(Response(CustomerResponse))
                SentenceTokens.remove(CustomerResponse)
    else:
        flag = False
        print("30T: Goodbye! Visit Again.")

30T: Respond with 'Bye' to Exit Anytime.
30T: Hello! Welcome to Sony Electronics. Hope you're having a Nice Day.
30T: I am 301, How May I Help You?

You: hi
30T: Hey

You: GREETINGS
30T: Hi There

You: sup
30T: Hi There

You: Hello
30T: Hi There

You: thanks
30T: You are Welcome!

You: bye
30T: Goodbye! Visit Again.
