In [None]:
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
import nltk
import re
from nltk.stem.lancaster import LancasterStemmer
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
from keras.preprocessing.text import Tokenizer
from keras_preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential,load_model
from keras.layers import Dense,GRU, LSTM, Bidirectional, Embedding, Dropout, Flatten
from keras.callbacks import ModelCheckpoint

In [None]:
  def load_dataset(filename):
     df= pd.read_csv(filename,encoding = "utf-8", names = ["Sentence", "Intent"])
     intent =df["Intent"]
     unique_intent = list(set(intent))
     sentences = list(df['Sentence'])
     return (intent,unique_intent,sentences)


In [None]:
  intent,unique_intent,sentences = load_dataset('/content/dataCorp.csv')

  print(sentences[:11])

In [None]:
#  stemmer= LancasterStemmer()

In [None]:
 df= pd.read_csv("/content/stopwords.csv",encoding = "utf-8", names = ["stopwords"])


In [None]:
words =df["stopwords"]
stopwords = list(set(words))
print(stopwords)


In [None]:
  def cleaning(sentences):
       words = []
       for s in sentences:
         clean= re.sub(r'[^ a-z A-Z 0-9]'," ",s)
         text_tokens = clean.split()
         w =[word for word in text_tokens if not word in stopwords]
         words.append([i.lower() for i in w])

       return words

In [None]:
  cleaned_words = cleaning(sentences)
  print(len(cleaned_words))
  print(cleaned_words)
  print(unique_intent)
  print(intent)


In [None]:
  def ready(sentences):
    print(sentences)
    # Tokenizer API
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(sentences)
    print(tokenizer.word_index)
    total_words = len(tokenizer.word_index)+1
    input_sequences = []
    for s in sentences:
      token_list= tokenizer.texts_to_sequences([s])[0]
      print(token_list)
      for i in range(0,len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)
    y = []                 # for intent array
    for i in range(len(intent)):
      temp = len(sentences[i].split(','))
    #  print(temp)
     # print(sentences[i],temp)
    print(temp)
    for j in range(temp):
        y.append(intent[i])
    return input_sequences,y,total_words


In [None]:
   input_sequences,y,total_words = ready(sentences)
   print(input_sequences)    #n-gram result
   #print(y)
   #print(total_words)

In [None]:
  max_sequence_len = max([len(x) for x in input_sequences])
  input_sequences = np.array(pad_sequences(input_sequences,maxlen=max_sequence_len,padding='post'))
  print(input_sequences)
  

In [None]:
  def create_tokenizer(words,filters='!"#$%&()*+,-./:;<=>?@[\]^_^{|}~'):
        token = Tokenizer(filters=filters)
        token.fit_on_texts(words)
        return token
        

In [None]:
  def max_length ( words ) :
    return(len(max(words,key=len)))
    

In [None]:
  word_tokenizer = create_tokenizer ( cleaned_words )
  vocab_size = len(word_tokenizer.word_index)+1
  max_length = max_length(cleaned_words)
  print("Vocab Size = %d and Maximum length = %d"%(vocab_size,max_length))

Vocab Size = 185 and Maximum length = 8


In [None]:
  word_tokenizer.word_index
  

In [None]:
  def encoding_doc(token,words):                              #tokenizer object
    return(token.texts_to_sequences(words))
    

In [None]:
encoded_doc = encoding_doc(word_tokenizer,cleaned_words)
  

In [None]:
  def padding_doc(encoded_doc,max_length):
    return(pad_sequences(encoded_doc,maxlen = max_length, padding = "post"))
    

In [None]:
  padded_doc = padding_doc(encoded_doc,max_length)

In [None]:
  print(padded_doc[7])


[ 2  6  4 22  0  0  0  0]


In [None]:
  print("shape of padded docs=",padded_doc)


In [None]:
  #tokenizer with filter changed
  output_tokenizer = create_tokenizer(unique_intent, filters='!"#$%&()*+,-./:;<=>?@[\]^`{|}~')


In [None]:
  output_tokenizer.word_index
  

{'order': 1, 'menu': 2, 'price': 3, 'end': 4, 'greet': 5}

In [None]:
  encoded_output = encoding_doc(output_tokenizer,intent)
  print(encoded_output)

In [None]:
  encoded_output = np.array(encoded_output).reshape(len(encoded_output),1)
  # print(encoded_output)

In [None]:
  def one_hot(encode):
   o = OneHotEncoder(sparse=False)
   return(o.fit_transform(encode))

In [None]:
  output_one_hot = one_hot(encoded_output)


In [None]:
  output_one_hot.shape
  print(output_one_hot.shape)

(163, 5)


In [None]:
  from sklearn.model_selection import train_test_split


In [None]:
  train_X, val_X,train_Y,val_Y = train_test_split(padded_doc, output_one_hot ,shuffle=True,test_size=0.3)

In [None]:
  print("shape of train_X= %s and train_Y = %s"%( train_X.shape , train_Y.shape))
  print ("Shape of val_X= %s and val_Y = %s"%( val_X.shape , val_Y.shape))

In [None]:
from keras.layers.pooling import GlobalAveragePooling1D
def create_model(vocab_size,max_length):
      model = Sequential()
      model.add(Embedding(vocab_size,256,input_length = max_length))
      # model.add( GRU(128))
      model.add ( LSTM ( 64) )
      model.add(Flatten())
      model.add(Dense(16,activation="relu",))
      model.add(Dense( 5, activation = "softmax"))
      return model
      

In [None]:
  model = create_model(vocab_size,max_length)

  model.compile(loss = "categorical_crossentropy",optimizer = "adam",metrics = ["accuracy"])
  model.summary()


In [None]:
  filename = 'model.h5'
  checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
  early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0,
    patience=1,
    verbose=2,
    mode='auto',
    baseline=None,
    restore_best_weights=True
)
  hist= model.fit(train_X, train_Y, epochs = 50, batch_size=16, verbose=1, validation_data = (val_X,val_Y), callbacks=[checkpoint, early_stopping])


In [None]:
  model= load_model("model.h5")

In [None]:
  def predictions(text):
    clean = re.sub(r'[^ a-z A-Z 0-9]',',',text)
    test_word = clean.split()
    #print(test_word)
    test_word = [w.lower() for w in test_word]
    test_ls = word_tokenizer.texts_to_sequences(test_word)
    #print(test_ls)
    if [] in test_ls:
      test_ls = list(filter(None,test_ls))      #removing None values from list

    test_ls = np.array(test_ls).reshape(1,len(test_ls))      # 1 row
    print(test_ls);
    x = padding_doc(test_ls,max_length)
   # print(x)
    pred = model.predict(x)

   # print(pred)
    return pred
    

In [None]:
    def get_final_output(pred, classes):
     print(pred[0])
     predictions = pred[0]
    # print(predictions)
     classes = np.array(classes)
    # print(classes)
     ids = np.argsort( -predictions)
    # print(ids)
     classes = classes[ids]
    # print(classes[ids])
     predictions = -np.sort( -predictions)
     
     return classes[0]
    #  for i in range(pred.shape[1]):
    #           print("%s has confidence = %s" %(classes[i], (predictions[i])))


In [None]:
    pip install googletrans==3.1.0a0


In [None]:
  from googletrans import Translator
  translator= Translator()
  text= translator.translate('मुझे बिरयानी ऑर्डर करनी है').text
  pred = predictions(text)
  get_final_output(pred, unique_intent)
      

In [None]:
DB = [{
       'name': 'simple veg',
       'quarter': 125,
       'half': 205,
       'full': 265,
       'type': 'veg',
      
        },
       
     {
        'name':'chicken 65',
        'quarter': 125,
        'half':205,
        'full': 260,
        'type':'non-veg',
       },
    
   {
    'name': 'fish fry',
    'quarter': 160,
    'half': 240,
    'full': 310,
    'type':'non-veg',
   },
   {
     'name':'prawns fry', 
     'quarter': 140,
     'half': 220,
     'full': 290,
     'type': 'non-veg',},
       
   {
    'name': 'paneer',
    'quarter': 210,
    'half': 270,
    'full': 340,
    'type': 'veg',
       
   },
       
   {
    'name': 'kaju',
    'quarter': 210,
    'half': 270,
    'full': 340,
    'type': 'veg',
   },

  {
    'name': 'egg',
    'quarter': 170,
    'half': 240,
    'full': 310,
    'type': 'veg',
   },
       
   {
   'name':'mushroom',
   'quarter': 180,
   'half': 250,
   'full': 320,
   'type': 'veg',
   },
       
   {
   'name': 'chicken',
   'quarter': 170,
   'half': 250,
   'full': 310,
   'type': 'non-veg',
   },
    ]

In [None]:
    pip install word2number

In [None]:
  lang = 'en' #language of the chatbot

In [None]:
order_list=[]
sizes = ['quarter','half','full']
briyani = [obj['name'] for obj in DB]
     #print ( pizzas ) 
from word2number import w2n
def get_overall_info(sentence):
       match = re.search('(?P<quantity>([ 1-9 ]|a|one|two|three|four|five|six|seven|eight|nine))??(?P<size>(quarter half full))??(?P<name>( prawns fry|chicken 65|prawns fry| kaju | paneer | egg | mushroom | chicken roll))? briyani?',sentence)
       entity_dict = match.groupdict()
       print('Entity Dict before:',entity_dict)
       if entity_dict['quantity']=='a':
          entity_dict['quantity']=1
        # elif entity_dict[ ‘quantity’]= None
        # entity_dict[ ‘quantity’ ]=1
       elif entity_dict['quantity'] !=None:
         entity_dict[ 'quantity' ]=str(w2n.word_to_num(str(entity_dict['quantity'])))
         #print(entity_dict)
       entity_dict = ask(entity_dict)
       #print(entity_dict)
       #required pizza object
       briyani_obj = {}
       for obj in DB:
            if obj['name']==entity_dict['name']:
              briyani_object = obj
       overall_info_object = {'quantity':str(entity_dict['quantity']), 'price':str(briyani_object[entity_dict['size']]),'pobj':briyani_object}
       return overall_info_object,entity_dict

def ask(entity_dict):
       for i in entity_dict.keys():
          print(entity_dict)
          if entity_dict[i] == None:
              if(i=='quantity'):
                  while(True):
                      user_input = input(translator.translate("How many  do you want?",dest=lang).text)
                      user_input_translated = translator.translate(user_input).text.lower()
                      if int(user_input_translated)<1 or int(user_input_translated) > 9:
                        print(translator.translate('Sorry you cannot order'+str(user_input_translated),dest=lang).text)
                      else:
                        entity_dict[i] = str(user_input_translated)
                        break;
                                                   
              elif i=='size':
                        while(True):
                            user_input = input(translator.translate("We have quarter, half and full , choose one", dest=lang).text)
                            user_input_translated = translator.translate(user_input).text.lower()
                          # print(user_input_translated) #remove later
                            if user_input_translated not in sizes:
                                print(translator.translate('Please choose the correct size',dest=lang).text)
                            else:
                              entity_dict[i]=user_input_translated
                              break;
              elif i=='name':
                        while(True):
                           user_input = input(translator.translate('what Briyani would you like to have',dest=lang).text)
                           user_input_translated = translator.translate(user_input).text.lower()
                           if user_input_translated not in briyani:
                               print(translator.translate('Please choose the correct Order',dest=lang).text)
                           else:
                             entity_dict[i]=user_input_translated
                             print(entity_dict)
                             break

       return entity_dict

In [None]:
def response(sentence, intent):
          if intent == 'greet':
            response = 'Hi! How may I help you?'
            return response
          elif intent == 'menu':
              for obj in DB:
                response=str(obj['name'])+ 'price:' +str(obj['full'])+'rs'
                print(translator.translate(response,dest=lang).text)
          elif intent=='order':
                  overall_info_object,entity_dict = get_overall_info(sentence)
                  order_list.append(overall_info_object)
                            #print(order_list)
                  response='Thank You for your order of ' +str(order_list[-1]['quantity'])+' '+str(entity_dict['name']+' '+ 'briyani')
                  return response
          elif intent == 'price':
                     overall_info_object,entity_dict = get_overall_info(sentence)
                     price = int(overall_info_object['quantity'])*int(overall_info_object['price'])
                     response='The price is Rs.'+str(price)
                     return response

          elif intent == 'end':
                  bill =0
                  for i in order_list:
                      bill += int(i['quantity'])*int(i['price'])
                  if(bill):
                        response=' Thank You for Orderingin  Rezzo restaurent ,Your total bill amount is Rs.'+str(bill)
                        return response
                  else:
                        response='see you later'
                        return response
                        

In [None]:

while(1):
    raw_input = input()
    translated_input = translator.translate(raw_input).text.lower()
    if(translated_input=='bill please'):
      intent='end';
    else:
       pred = predictions(translated_input)
       intent = get_final_output(pred,unique_intent)
    print(intent)
    if(intent=='end'): 
        resp = response(translated_input, intent)
        print(translator.translate(resp,dest=lang).text)
        break
    else:
        resp =response(translated_input, intent)
        print(translator.translate(resp,dest=lang).text)