In [1]:
from flask import Flask, request, json, render_template
import tensorflow
import numpy as np
import joblib

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC 

from sklearn import preprocessing

import nltk 
# nltk.download('all')
from nltk.corpus import stopwords, wordnet 
from nltk.tokenize import word_tokenize
from nltk.tokenize import RegexpTokenizer

In [2]:
import os
import re
import sys

In [3]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
from transformers import TFAutoModelForSequenceClassification

In [4]:
os.chdir("..")

In [5]:
os.listdir()

['ClassificationModel',
 'Datasets',
 'env.yml',
 'Machine Learning Task.pdf',
 'MARBERT',
 'model',
 'Notebooks',
 'requirements.txt',
 'Scripts',
 'templates',
 '__pycache__']

In [6]:
sys.path.append("Scripts")
sys.path.append("dialects")
sys.path.append("templates")

In [7]:
from preprocess_and_tokenize import *
from dialects import dialects_dict

---
## Models Loading (Tokenizers for MARBERT)

In [8]:
tokenizer = AutoTokenizer.from_pretrained("UBC-NLP/MARBERT")
marebert_model = TFAutoModelForSequenceClassification.from_pretrained("UBC-NLP/MARBERT", num_labels=18) 

marebert_model.load_weights("MARBERT/tf_model.h5")
svm_pipeline = joblib.load("ClassificationModel/classification_pipeline.pkl")

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


## Flask API

In [35]:
app = Flask(__name__)

@app.route("/")
def index():
    return render_template("index.html")

@app.route("/prediction", methods=["POST"])
def prediction():
    
    # Request parsing and preprocessing
    text_inp = list(request.form.values())[0]
    preprocessed_text = PreprocessTweets(text_inp).preprocessing_pipeline()
    
    # Tokenization is done twice, one for svm and other for marbert
    tokenized_for_ml = TweetsTokenizing(preprocessed_text).tokenize_pipeline()
    tokenized_for_bert = tokenizer.encode(preprocessed_text, truncation=True, padding=True, return_tensors="tf")
    
    # Do prediction using two models
    ml_predict = f"SVM Prediction: {svm_pipeline.predict([f'{tokenized_for_ml}'])[0]}"
    bert_predict = f"MARBERT Prediction: {dialects_dict[np.argmax(marebert_model.predict(tokenized_for_bert)[0], axis=1)[0]]}"
    
    # Rendering with predictions
    return render_template("index.html", prediction_ml=ml_predict, prediction_dl=bert_predict)

app.run(debug=True, use_reloader=False)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [13/Mar/2022 14:54:56] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [13/Mar/2022 14:55:23] "[37mPOST /prediction HTTP/1.1[0m" 200 -
127.0.0.1 - - [13/Mar/2022 14:55:43] "[37mPOST /prediction HTTP/1.1[0m" 200 -
127.0.0.1 - - [13/Mar/2022 14:56:33] "[37mPOST /prediction HTTP/1.1[0m" 200 -
127.0.0.1 - - [13/Mar/2022 14:58:20] "[37mPOST /prediction HTTP/1.1[0m" 200 -
127.0.0.1 - - [13/Mar/2022 14:59:43] "[37mPOST /prediction HTTP/1.1[0m" 200 -
127.0.0.1 - - [13/Mar/2022 14:59:57] "[37mPOST /prediction HTTP/1.1[0m" 200 -
