In [2]:
from flask import Flask, render_template, request, jsonify
import pickle
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
import nltk
import stanza

nltk.download("stopwords")

stop_words_fr = stopwords.words('french')
nlp = stanza.Pipeline("fr", processors="tokenize,mwt,pos,lemma")

def normalize(text):
    doc = nlp(text)
    lemmatized_filtered_text = [word.lemma.lower() for sentence in doc.sentences for word in sentence.words if word.lemma.lower() not in stop_words_fr]
    return " ".join(lemmatized_filtered_text)

with open('svm.pkl', 'rb') as file:
    svm = pickle.load(file)

with open('naivebayes.pkl', 'rb') as file:
    nb = pickle.load(file)

with open("vectorizer.pkl", "rb") as file:
    vectorizer= pickle.load(file)

app = Flask(__name__)

bdd = pd.read_excel("BDD_normalisé.xlsx")


def vectorize(text):
    text = normalize(text)
    vect_text = vectorizer.transform([text])
    return vect_text

@app.route("/")
def index():
    return render_template("index.html")

@app.route('/predict', methods = ['POST'])
def predict():

    email_content = request.form.get('email')
    model = request.form.get('model')

    if not email_content or not model:
        return jsonify({'error': 'Email content or model selection is missing'}), 400
    
    vect_email = vectorize(email_content)

    if model == "svm":
        prediction = svm.predict(vect_email)[0]
        pred_result = 'SPAM' if prediction == 1 else 'NOT SPAM'
    elif model == "nb":
        prediction = nb.predict(vect_email)[0]
        pred_result = 'SPAM' if prediction == 1 else 'NOT SPAM'

    return jsonify({'prediction': pred_result})

if __name__ == '__main__':
    app.run(host='127.0.0.1', port=8000, debug=True)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\pc\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
2025-04-24 23:49:33 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.10.0.json: 426kB [00:00, 6.75MB/s]                    
2025-04-24 23:49:33 INFO: Downloaded file to C:\Users\pc\stanza_resources\resources.json
2025-04-24 23:49:34 INFO: Loading these models for language: fr (French):
| Processor | Package           |
---------------------------------
| tokenize  | combined          |
| mwt       | combined          |
| pos       | combined_charlm   |
| lemma     | combined_nocharlm |

2025-04-24 23:49:34 INFO: Using device: cpu
2025-04-24 23:49:34 INFO: Loading: tokenize
2025-04-24 23:49:34 INFO: Loading: mwt
2025-04-24 23:49:34 INFO: Loading: pos
2025-04-24 23:49:36 INFO: Loading: lemma
2025-04-24 23:49:37 INFO: Done loading processors!


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:8000
Press CTRL+C to quit
 * Restarting with stat


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
