In [31]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
import random
from flask import Flask, render_template, request
import joblib
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import MaxAbsScaler
from nltk.corpus import stopwords
import re
from itertools import groupby
from pyarabic.araby import *
import pickle

In [32]:
#preprocessing for dataset

#removing arabic stopwords
stop_words = stopwords.words('arabic') 
def filter_text(text):
    
    """
    Function to filter text
    
    Parameters:
      * text(string): text that is filtered from special characters and others
      
    Return text(string): filtered text using regular expression
    """
    #filter text by removing special characters -> not arabic words or numbers -> trim spcases -> removing stopwords and others
    filtered_text = " ".join([word for word in text.split(' ') if word not in stop_words])
    filtered_text = " ".join(re.findall('[\u0600-\u06ff]+',filtered_text))
    filtered_text = re.sub('\s+',' ',re.sub('[٠-٩؟،]','',filtered_text))
    filtered_text = re.sub("[إأآا]", "ا", filtered_text)
    filtered_text = "".join(c for c, _ in groupby(filtered_text))
    filtered_text = strip_tashkeel(filtered_text)
    filtered_text = strip_lastharaka(filtered_text)
    filtered_text = strip_tatweel(filtered_text)

    return filtered_text

In [55]:
#predict from ML model

#load vectors of transformation
counter   = joblib.load('vectors/count_vector.pkl')
scaler    = joblib.load('vectors/scaler.pkl')
ml_model  = joblib.load('models/dialect_ml_model.pkl')


In [56]:
def ret_ml_prediction(text):
    return ml_model.predict(scaler.transform(counter.transform([filter_text(text)])))[0]

In [76]:
#predict from DL model
word2idx = joblib.load('vectors/dict_word2idx.sav')
dl_model = load_model('models/dialect_dl_model.h5')
encoder  = joblib.load('vectors/encoder.pkl') 

In [53]:
max_sequence_len = 100
def convert_text_to_vector(text):
    
  vector = np.zeros((1,max_sequence_len))
  text_ls = text.split(' ')
  for i in range(len(text_ls)):
    vector[0,i] = word2idx.get(text_ls[i],0)

  return list(vector[0])

In [109]:
def ret_dl_prediction(text):
    
    vec = np.array(convert_text_to_vector(text)).reshape(-1,1).T
    prediction = np.argmax(dl_model.predict(vec))
    
    return encoder.inverse_transform([[prediction]])[0][0]

In [115]:
app = Flask(__name__)

@app.route('/')
def form():
    return render_template('public/dialect.html')


@app.route("/insert-text", methods=["GET", "POST"])
def insert_text():

    if request.method == "POST":
        
        text = request.form['text']

        try:
            model_option = int(request.form.getlist('options')[0])
            
            print(model_option)
            if model_option == 1:
                value = ret_ml_prediction(text.strip())
            else:
                value = ret_dl_prediction(text.strip())


            #----------------------------------------------

            return render_template("public/dialect.html",value = value)

            #----------------------------------------------
        except:
            print('not selected')
        return redirect(request.url)


    
if __name__ == '__main__':
    app.run(debug=False,port=5001)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5001/ (Press CTRL+C to quit)
127.0.0.1 - - [14/Mar/2022 07:31:16] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [14/Mar/2022 07:31:24] "[37mPOST /insert-text HTTP/1.1[0m" 200 -


1


127.0.0.1 - - [14/Mar/2022 07:31:32] "[37mPOST /insert-text HTTP/1.1[0m" 200 -


2
