# Import Libraries

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

In [3]:
# Load dataset

df = pd.read_csv("spam.csv", encoding='latin-1')[['v1', 'v2']]
df.columns = ['label', 'message']
df

Unnamed: 0,label,message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...
5568,ham,Will Ì_ b going to esplanade fr home?
5569,ham,"Pity, * was in mood for that. So...any other s..."
5570,ham,The guy did some bitching but I acted like i'd...


In [4]:
# Encode labels

df['label'] = df['label'].map({'ham': 0, 'spam': 1})

In [5]:
# Split data

X_train, X_test, y_train, y_test = train_test_split(
    df['message'], df['label'], test_size=0.2, random_state=42)

In [6]:
# Vectorize text for convert text into numerical values

vectorizer = TfidfVectorizer(stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [7]:
# Train SVM classifier

model = LinearSVC()
model.fit(X_train_tfidf, y_train)


LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=0)

In [8]:
# Predict & Evaluate

y_pred = model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=["Ham", "Spam"]))

Accuracy: 0.97847533632287
              precision    recall  f1-score   support

         Ham       0.98      0.99      0.99       965
        Spam       0.96      0.87      0.92       150

    accuracy                           0.98      1115
   macro avg       0.97      0.93      0.95      1115
weighted avg       0.98      0.98      0.98      1115



# Test The Model

In [15]:
test_emails = [
    "Get cheap loans approved instantly, no credit check!",
    "Can we reschedule our meeting for next Monday?",
    "Congratulations! You've won a $1000 Walmart gift card. Click here to claim now."
]

test_vec = vectorizer.transform(test_emails)
predictions = model.predict(test_vec)

for email, pred in zip(test_emails, predictions):
    print(f"Email: '{email}' --> {'Spam' if pred == 1 else 'Not Spam'}")

Email: 'Get cheap loans approved instantly, no credit check!' --> Not Spam
Email: 'Can we reschedule our meeting for next Monday?' --> Not Spam
Email: 'Congratulations! You've won a $1000 Walmart gift card. Click here to claim now.' --> Spam
