In [None]:
import pandas as pd
import pickle
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import warnings
warnings.filterwarnings('ignore')



In [None]:
with open('data/classical_results.pkl', 'rb') as f:
    classical = pickle.load(f)
    tfidf = classical['tfidf']
    nb_model = classical['nb_model']
    svm_model = classical['svm_model']
    lr_model = classical['lr_model']

tokenizer = BertTokenizer.from_pretrained('./bert_sms_spam')
model = BertForSequenceClassification.from_pretrained('./bert_sms_spam')

In [None]:
with open('data/all_results.pkl', 'rb') as f:
    data = pickle.load(f)
    results = data['results']

comparison_df = pd.DataFrame({
    'ModÃ¨le': list(results.keys()),
    'Accuracy': [v['accuracy'] for v in results.values()]
})
best_model = comparison_df.loc[comparison_df['Accuracy'].idxmax(), 'ModÃ¨le']

In [22]:
def predict_sms(message, model_name='BERT'):
    if model_name == 'BERT':
        encoding = tokenizer(message, truncation=True, padding='max_length', max_length=128, return_tensors='pt')
        with torch.no_grad():
            outputs = model(**encoding)
            proba = torch.softmax(outputs.logits, dim=1)[0, 1].item()
            prediction = 'SPAM' if proba > 0.5 else 'HAM'
    else:
        msg_tfidf = tfidf.transform([message])
        if model_name == 'Naive Bayes':
            proba = nb_model.predict_proba(msg_tfidf)[0, 1]
        elif model_name == 'SVM':
            proba = svm_model.predict_proba(msg_tfidf)[0, 1]
        else:  # Logistic Regression
            proba = lr_model.predict_proba(msg_tfidf)[0, 1]
        prediction = 'SPAM' if proba > 0.5 else 'HAM'

    return prediction, proba

# Tests d'exemples
test_messages = [
    "WINNER!! You have won a $1,000 prize! Call now to claim!",
    "Hey, are we still meeting for lunch tomorrow?",
    "Congratulations! You've been selected for a FREE vacation!",
    "Can you pick up some milk on your way home?"
]

print("\nðŸ“§ Test de messages:")
for msg in test_messages:
    pred, proba = predict_sms(msg, best_model)
    print(f"\nMessage: {msg[:60]}...")
    print(f"PrÃ©diction: {pred} (Confiance: {proba:.2%})")


ðŸ“§ Test de messages:

Message: WINNER!! You have won a $1,000 prize! Call now to claim!...
PrÃ©diction: SPAM (Confiance: 95.00%)

Message: Hey, are we still meeting for lunch tomorrow?...
PrÃ©diction: HAM (Confiance: 0.04%)

Message: Congratulations! You've been selected for a FREE vacation!...
PrÃ©diction: HAM (Confiance: 1.72%)

Message: Can you pick up some milk on your way home?...
PrÃ©diction: HAM (Confiance: 0.05%)
