#Arabic_model

### Import libraries 

In [None]:
# !pip install pyarabic
import numpy as np
import pickle
from keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import pyarabic.araby as araby
import nltk
import pandas as pd
import string
import re
import warnings
warnings.filterwarnings('ignore')
import joblib
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

### Language detection model

In [None]:
df = pd.read_csv('Language_detection.csv',error_bad_lines=False)
def removeSymbolsAndNumbers(text):        
        text = re.sub(r'[{}]'.format(string.punctuation), '', text)
        text = re.sub(r'\d+', '', text)
        text = re.sub(r'[@]', '', text)

        return text.lower()

def removeEnglishLetters(text):
    text = re.sub(r'[a-zA-Z]+', '', text)
    return text.lower()


X0 = df.apply(lambda x: removeEnglishLetters(x.Text) if x.Language in ['Arabic']  else x.Text, axis = 1)
X1 = X0.apply(removeSymbolsAndNumbers)
y = df['Language']
x_train, x_test, y_train, y_test = train_test_split(X1,y, random_state=42)

vectorizer = TfidfVectorizer(ngram_range=(1,3), analyzer='char')

pipe = pipeline.Pipeline([
    ('vectorizer', vectorizer),
    ('clf', LogisticRegression())
])

pipe.fit(x_train,y_train)

Pipeline(steps=[('vectorizer',
                 TfidfVectorizer(analyzer='char', ngram_range=(1, 3))),
                ('clf', LogisticRegression())])

### Load data and models

In [None]:
with open('questions_arabic.pkl', 'rb') as f:
   questions_arabic = pickle.load(f)
with open('answers_arabic.pkl', 'rb') as f:
   answers_arabic = pickle.load(f)
with open('questions_English.pkl', 'rb') as f:
   questions = pickle.load(f)
with open('answers_English.pkl', 'rb') as f:
   answers = pickle.load(f)

In [None]:
enc_model=load_model('Arabic_encoder_model.h5')
dec_model=load_model('Arabic_decoder_model.h5')
enc_model_english=load_model('encoder_model_english3 .h5')
dec_model_english=load_model('decoder_model_english3 .h5')



In [None]:
tokenizer =Tokenizer()
tokenizer.fit_on_texts( questions_arabic + answers_arabic )
VOCAB_SIZE = len( tokenizer.word_index )+1
print( 'VOCAB SIZE : {}'.format( VOCAB_SIZE ))

In [None]:
def normalizeArabic(text):
  text = text.strip()
  text = re.sub("[إأٱآا]", "ا", text)
  text = re.sub("ى", "ي", text)
  text = re.sub("ؤ", "ء", text)
  text = re.sub("ئ", "ء", text)
  text = re.sub("ة", "ه", text)
  text = re.sub("او", "", text)
  noise = re.compile(""" ّ    | # Tashdid
                        َ    | # Fatha
                        ً    | # Tanwin Fath
                        ُ    | # Damma
                        ٌ    | # Tanwin Damm
                        ِ    | # Kasra
                        ٍ    | # Tanwin Kasr
                        ْ    | # Sukun
                        ـ     # Tatwil/Kashida
                    """, re.VERBOSE)
  text = re.sub(noise, '', text)
  text= re.sub('[\\s]+'," ",text) # Remove all spaces
  ## remove extra whitespace
  text = re.sub('\s+', ' ', text)  
  return araby.strip_tashkeel(text)

In [None]:
maxlen_answers=10
maxlen_questions=18
def str_to_tokens( sentence : str ):
    words =normalizeArabic(sentence).split()
    tokens_list = list()
    for word in words:
        tokens_list.append( tokenizer.word_index[ word ] ) 
    return pad_sequences( [tokens_list] , maxlen=maxlen_questions , padding='post')

In [None]:
#get the user name
def decode_response(text):
#      for _ in range(encoder_input_data.shape[0]):
        states_values = enc_model.predict(text)
        # states_values = enc_model.predict(text )
        empty_target_seq = np.zeros( ( 1 , 1 ) )
        empty_target_seq[0, 0] = tokenizer.word_index['start']
        stop_condition = False
        decoded_translation = ''
        while not stop_condition :
            dec_outputs , h , c = dec_model.predict([ empty_target_seq ] + states_values )
            sampled_word_index = np.argmax( dec_outputs[0, -1, :] )
            sampled_word = None
            for word , index in tokenizer.word_index.items() :
                if sampled_word_index == index :
                    decoded_translation += ' {}'.format( word )
                    sampled_word = word
    
            if sampled_word == 'end' or len(decoded_translation.split()) > maxlen_answers:
               stop_condition = True
            
            empty_target_seq = np.zeros( ( 1 , 1 ) )  
            empty_target_seq[ 0 , 0 ] = sampled_word_index
            states_values = [ h , c ] 
        return "Bot:" +decoded_translation.replace(' end', '')

In [None]:
def generate_response(text):
#     text = input("")
    input_matrix = str_to_tokens(text)
    chatbot_response = decode_response(input_matrix)
  #Remove <START> and <END> tokens from chatbot_response
    chatbot_response = chatbot_response.replace("<START>",'')
    chatbot_response = chatbot_response.replace("<END>",'')
    return chatbot_response

In [None]:
generate_response(" البلوزه رقم 21 كم سعرها")

Enter question :  البلوزه رقم 21 كم سعرها


'Bot: سعر البلوزه 4000'

#English_model


In [None]:
def clean_text(text):
    text = re.sub(r"\s+"," ",str(text)) # to remove white space
    text = re.sub(r"<[^>]+>"," ",str(text)) # remove html tags
    text = re.sub(r"[^A-Za-zÀ-ž-Z0-9'.,]"," ",str(text)) # not clean A-Za-zÀ-ž-Z0-9
    word_tokens = text.split()
    text_clean = " ".join(word_tokens)
    return text_clean

[nltk_data] Downloading package omw-1.4 to C:\Users\Mohamed
[nltk_data]     Mousa\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [None]:
tokenizer_english =Tokenizer()
tokenizer_english.fit_on_texts( questions + answers )
VOCAB_SIZE = len( tokenizer_english.word_index )+1
print( 'VOCAB SIZE : {}'.format( VOCAB_SIZE ))

VOCAB SIZE : 5817


In [None]:
maxlen_questions_english=15
maxlen_answers_english=80
def str_to_tokens_english( sentence : str ):
    words = clean_text(sentence).split()
    tokens_list = list()
    for word in words:
        tokens_list.append( tokenizer_english.word_index[ word ] ) 
    return pad_sequences( [tokens_list] , maxlen=maxlen_questions_english , padding='post')

In [None]:
def decode_response_english(text):
  #  for _ in range(encoder_input_data.shape[0]):
        states_values = enc_model_english.predict(text)
        # states_values = enc_model_english.predict(text )
        empty_target_seq = np.zeros( ( 1 , 1 ) )
        empty_target_seq[0, 0] = tokenizer_english.word_index['start']
        stop_condition = False
        decoded_translation = ''
        while not stop_condition :
            dec_outputs , h , c = dec_model_english.predict([ empty_target_seq ] + states_values )
            sampled_word_index = np.argmax( dec_outputs[0, -1, :] )
            sampled_word = None
            for word , index in tokenizer_english.word_index.items() :
                if sampled_word_index == index :
                    decoded_translation += ' {}'.format( word )
                    sampled_word = word
            if sampled_word == 'end' or len(decoded_translation.split()) > maxlen_answers_english:
               stop_condition = True
            
            empty_target_seq = np.zeros( ( 1 , 1 ) )  
            empty_target_seq[ 0 , 0 ] = sampled_word_index
            states_values = [ h , c ] 
        return "Bot:" +decoded_translation.replace(' end', '')

In [None]:
def generate_response_english(text):
    # text = input( 'Enter question : ' )
    input_matrix = str_to_tokens_english(text)
    chatbot_response = decode_response_english(input_matrix)
  #Remove <START> and <END> tokens from chatbot_response
    chatbot_response = chatbot_response.replace("<START>",'')
    chatbot_response = chatbot_response.replace("<END>",'')
    return chatbot_response

In [None]:
generate_response_english('does this have a flip stand')

In [None]:
questions

['does it fit nook glowlight',
 'does this have a flip stand',
 'does it work with macbook air mid2012',
 'does this work with mac mini',
 'does this item come with a charger',
 'does this nook play games',
 'does this model have an sd card slot',
 'can i download the kindle app for this',
 'can you download netflix',
 'is it user friendly',
 'can i play facebook games on it',
 'can i download the kindle app',
 'is this compatible with mac os 10.4.11',
 'can it play games for computer',
 'does it work with an imac',
 'will these batteries work in the bushnell range finder',
 'does the charger work for aa aaa batteries as well',
 'do you carry this product in white',
 'will this work in italy',
 'does it work for a gopro hero',
 'will this work for the gopro 4 silver',
 'does the adapter come with sd micro',
 'can you download save apps in this card',
 'does it work in other samsung phones',
 'will this work in a samsung s4 mini',
 'will this work in a samsung ace 3 s7275',
 'is possibl

In [None]:
def predict_language(text):
    lang = pipe.predict([text])
    return(lang[0])
def final_response(message):
    
    if((predict_language(message)=="English")):
        #we predict first the intent(the class) from the message
        return(generate_response_english(message))
    else :
        return(generate_response(message))

In [None]:
final_response(" البلوزه رقم 21 كم سعرها")

In [None]:
from flask import Flask, render_template, request, redirect, url_for,jsonify
from flask_ngrok import run_with_ngrok

app = Flask(__name__, static_url_path='/static') 
run_with_ngrok(app)

@app.route("/",methods=["get"])
def home():
    return render_template("index.html")
@app.route('/predict',methods=["Post"])
def predict():
    text = request.get_json().get('message')
    response = final_response(text)
    message ={"answer" : response}
    return jsonify(message)
app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [24/Oct/2022 18:22:45] "GET / HTTP/1.1" 200 -


 * Running on http://0d3f-197-46-84-209.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


127.0.0.1 - - [24/Oct/2022 18:22:53] "POST /predict HTTP/1.1" 200 -




127.0.0.1 - - [24/Oct/2022 18:23:21] "POST /predict HTTP/1.1" 200 -




127.0.0.1 - - [24/Oct/2022 18:24:19] "POST /predict HTTP/1.1" 200 -
[2022-10-24 18:25:46,781] ERROR in app: Exception on /predict [POST]
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\flask\app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\ProgramData\Anaconda3\lib\site-packages\flask\app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\ProgramData\Anaconda3\lib\site-packages\flask\app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "C:\ProgramData\Anaconda3\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "C:\ProgramData\Anaconda3\lib\site-packages\flask\app.py", line 1950, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\ProgramData\Anaconda3\lib\site-packages\flask\app.py", line 1936, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "C:\Use



127.0.0.1 - - [24/Oct/2022 18:26:15] "POST /predict HTTP/1.1" 200 -




127.0.0.1 - - [24/Oct/2022 18:26:40] "POST /predict HTTP/1.1" 200 -




127.0.0.1 - - [24/Oct/2022 18:26:55] "POST /predict HTTP/1.1" 200 -




127.0.0.1 - - [24/Oct/2022 18:27:09] "POST /predict HTTP/1.1" 200 -




127.0.0.1 - - [24/Oct/2022 18:27:27] "POST /predict HTTP/1.1" 200 -
