In [None]:
pip install artifex

In [None]:
from artifex import Artifex
from sklearn.metrics import accuracy_score, classification_report

# Initialize the spam detection model
spam_detection = Artifex().spam_detection

# Sample dataset with 10 records and their true labels
messages = [
    "Congratulations! You've won a $1000 gift card. Click here to claim!",
    "Hi John, I received your email. I'll get back to you soon.",
    "Limited time offer! Get 50% off on your next purchase!",
    "Can we meet tomorrow for the report discussion?",
    "You have an exclusive chance to win an iPhone 14! Act now!",
    "Looking forward to the meeting at 10 AM tomorrow.",
    "Hurry up! Last chance to claim your free vacation package!",
    "Please find attached the monthly sales report.",
    "You’ve been selected to get a $500 gift card. Don’t miss out!",
    "Thank you for your feedback on the project. I’ll make the changes."
]

# True labels corresponding to the messages
# "spam" for spam messages, "ham" for non-spam messages
true_labels = [
    "spam", "ham", "spam", "ham", "spam", "ham", "spam", "ham", "spam", "ham"
]

# Store predicted labels for accuracy calculation
predicted_labels = []

# Process each message and classify it
for message in messages:
    spam_sentence = spam_detection(message)
    score = spam_sentence[0].score
    label = spam_sentence[0].label

    # Apply your threshold logic: score > 0.9
    if label == "spam" and score > 0.9:
        predicted_label = "spam"
        print(f"Message: {message}")
        print("The message is spam")
    else:
        predicted_label = "ham"
        print(f"Message: {message}")
        print("The message is ham")

    predicted_labels.append(predicted_label)

    # Print the full classification response for debugging
    print(f"ClassificationResponse: {spam_sentence}\n" + "-"*50)

# Calculate and print the accuracy and classification report
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"\nAccuracy: {accuracy:.2f}")

print("\nClassification Report:")
print(classification_report(true_labels, predicted_labels, target_names=["ham", "spam"]))


In [None]:
pip install numpy pandas scikit-learn

In [None]:
import numpy as np
import pandas as pd
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Sample dataset with labeled messages (spam or ham)
data = {
    'message': [
        "Congratulations! You've won a $1000 gift card. Click here to claim!",
        "Hi John, I received your email. I'll get back to you soon.",
        "Limited time offer! Get 50% off on your next purchase!",
        "Can we meet tomorrow for the report discussion?",
        "You have an exclusive chance to win an iPhone 14! Act now!",
        "Looking forward to the meeting at 10 AM tomorrow.",
        "Hurry up! Last chance to claim your free vacation package!",
        "Please find attached the monthly sales report.",
        "You’ve been selected to get a $500 gift card. Don’t miss out!",
        "Thank you for your feedback on the project. I’ll make the changes."
    ],
    'label': ['spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham']
}

# Step 1: Create a pandas DataFrame from the dataset
df = pd.DataFrame(data)

# Step 2: Text Preprocessing
def preprocess_text(text):
    # Convert text to lowercase
    text = text.lower()
    
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    
    return text

# Apply preprocessing to all messages
df['cleaned_message'] = df['message'].apply(preprocess_text)

# Step 3: Convert text to numerical features using CountVectorizer (Bag of Words)
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['cleaned_message'])

# Step 4: Label encoding (spam -> 1, ham -> 0)
y = np.array([1 if label == 'spam' else 0 for label in df['label']])

# Step 5: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Train the Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train, y_train)

# Step 7: Make predictions on the test set
y_pred = model.predict(X_test)

# Step 8: Evaluate the model's performance
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['ham', 'spam']))

# Step 9: Test with a new message
def predict_message(message):
    # Preprocess and vectorize the new message
    cleaned_message = preprocess_text(message)
    message_vector = vectorizer.transform([cleaned_message])
    
    # Predict the label (0 for ham, 1 for spam)
    prediction = model.predict(message_vector)
    return "spam" if prediction[0] == 1 else "ham"

# Test with a custom message
test_message = "Congratulations! You won a free iPhone 14, claim it now!"
print(f"Prediction for message: '{test_message}' is {predict_message(test_message)}")
# Step 10: Predict all messages in the dataset
print("\nPredictions for all messages in the dataset:")
for message in df['message']:
    predicted_label = predict_message(message)
    print(f"Message: {message}")
    print(f"Predicted Label: {predicted_label}")
    print("-"*50)
