In [None]:
!pip install evaluate

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer
raw_data=load_dataset("sms_spam")

# print(raw_data['train'][0])


tokenizer=AutoTokenizer.from_pretrained("bert-base-uncased")


def preprocess_function(examples):
  return tokenizer(examples['sms'],truncation=True,padding="max_length",max_length=128)




tokenized_datasets=raw_data.map(preprocess_function,batched=True)



In [None]:
from transformers import AutoModelForSequenceClassification
model_checkpoint='bert-base-uncased'

num_labels=2


model=AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint,
    label2id={0:"Ham",1:"spam"},
    num_labels=num_labels
)

In [None]:
from transformers import TrainingArguments,Trainer
import numpy as np
import evaluate
# from evaluate import accuracy

arg=TrainingArguments(
    output_dir="bert_SpamDetector",
    learning_rate=0.00002,
    per_device_train_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=10,
    eval_strategy="epoch"
)


split_dataset=tokenized_datasets['train'].train_test_split(test_size=0.2)
metric=evaluate.load("accuracy")

def compute_metrics(eval_pred):

    logits, labels = eval_pred

    predictions = np.argmax(logits, axis=-1)





    return metric.compute(predictions=predictions, references=labels)



trainer=Trainer(
    model=model,
    args=arg,
    train_dataset=split_dataset['train'],
    eval_dataset=split_dataset['test'],
    compute_metrics=compute_metrics


)


trainer.train()




In [None]:
import torch
import torch.nn.functional as F

def predict_message(text):

    inputs = tokenizer(text, return_tensors="pt").to(model.device)


    with torch.no_grad():
        logits = model(**inputs).logits


    probs = F.softmax(logits, dim=-1)


    pred_id = torch.argmax(probs).item()
    confidence = probs[0][pred_id].item()


    if pred_id == 1:
        label = "SPAM  (Alert!!!!!!!!)"
    else:
        label = " Not Spam (fair message)"

    return label, confidence

print("--- SPAM DETECTOR---")
print("Type 'quit' to exit.")

while True:

    user_text = input("\nEnter a message: ")


    if user_text.lower() == 'quit':
        break


    prediction, confidence = predict_message(user_text)


    print(f"Result: {prediction}")
    print(f"Confidence: {confidence:.2f}%")

--- SPAM DETECTOR---
Type 'quit' to exit.

Enter a message: URGENT! You have won a $1000 cash prize. Call 0800-111-222 to claim now!"
Result: SPAM  (Alert!!!!!!!!)
Confidence: 1.00%

Enter a message: Click this link http://win-money.com to verify your account."
Result: SPAM  (Alert!!!!!!!!)
Confidence: 1.00%

Enter a message: FREE ENTRY in our weekly competition."
Result:  Not Spam (fair message)
Confidence: 0.92%

Enter a message: quit
