In [13]:
from flask import Flask, request, jsonify, render_template
import joblib
from tensorflow.keras.models import load_model # type: ignore
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import numpy as np
import pandas as pd

# Initialize Flask app
app = Flask(__name__)

# Load pretrained model and vectorizer
model = load_model("paraphrase_model.h5")
vectorizer = joblib.load("tfidf_vectorizer.pkl")

# Preprocessing function
stop_words = set(stopwords.words("english"))
def preprocess_text(text):
    if pd.isnull(text):
        return ""
    tokens = nltk.word_tokenize(text.lower())
    filtered_tokens = [token for token in tokens if token.isalnum() and token not in stop_words]
    return " ".join(filtered_tokens)

# Route for homepage
@app.route("/")
def home():
    return render_template("index.html")

# Route for prediction
@app.route("/predict", methods=["POST"])
def predict():
    # Get sentences from the form
    sentence1 = request.form.get("sentence1", "")
    sentence2 = request.form.get("sentence2", "")

    # Preprocess and vectorize the sentences
    sentence1 = preprocess_text(sentence1)
    sentence2 = preprocess_text(sentence2)
    q1_vector = vectorizer.transform([sentence1])
    q2_vector = vectorizer.transform([sentence2])
    input_data = np.hstack((q1_vector.toarray(), q2_vector.toarray()))

    # Predict using the model
    prediction = model.predict(input_data)
    result = "Paraphrase" if prediction[0][0] > 0.5 else "Not a Paraphrase"
    print(f"Prediction value: {prediction[0][0]}")
    print(f"Vectorizer feature count: {vectorizer.get_feature_names_out().shape[0]}")

    return jsonify({"result": result})

if __name__ == "__main__":
    app.run(debug=True, use_reloader=False)




 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:31:35] "GET / HTTP/1.1" 200 -


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 293ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:32:19] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.5832339525222778
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:32:25] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.5832339525222778
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:50:26] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.09853899478912354
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:50:35] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.1193840280175209
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:50:41] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.15828634798526764
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:50:42] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.15828634798526764
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:50:43] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.15828634798526764
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:55:18] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.07747706770896912
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:55:23] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.07747706770896912
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:55:30] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.13552002608776093
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:55:38] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.20224250853061676
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:55:46] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.26879143714904785
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:55:52] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.26879143714904785
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:56:01] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.26879143714904785
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:56:02] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.26879143714904785
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:56:08] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.3092224597930908
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:56:13] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.3292391002178192
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:56:25] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.09853899478912354
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:56:32] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.09853899478912354
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 21:56:38] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.5832339525222778
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 22:03:36] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.46842285990715027
Vectorizer feature count: 10000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step


INFO:werkzeug:127.0.0.1 - - [24/Nov/2024 22:03:44] "POST /predict HTTP/1.1" 200 -


Prediction value: 0.5832339525222778
Vectorizer feature count: 10000
