In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

# Sample data (you can replace this with your actual dataset)
data = {
    'EmailText': [
        "Congratulations, you have won a lottery! Click here to claim your prize.",
        "Dear customer, your account has been suspended. Please verify your details.",
        "Hey, are we still on for lunch tomorrow?",
        "This is a friendly reminder about your upcoming appointment.",
        "Get cheap loans now, no credit check required!",
        "Meeting rescheduled to 3 PM. See you then.",
        "Limited time offer! Buy now and save 50%",
        "Don't forget to submit your report by end of day.",
        "Your package has been shipped. Track your order here.",
        "Urgent! Your bank account needs verification."
    ],
    'Label': [1, 1, 0, 0, 1, 0, 1, 0, 0, 1]  # 1 means spam, 0 means not spam
}

# Create a DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df['EmailText']
y = df['Label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Create a pipeline that includes vectorization and SVM
pipeline = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('scaler', StandardScaler(with_mean=False)),
    ('svm', SVC(kernel='linear', random_state=42))
])

# Train the model
pipeline.fit(X_train, y_train)

# Predict on the test set
y_pred = pipeline.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)


Accuracy: 1.0
Confusion Matrix:
[[1 0]
 [0 1]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

