# Import Libraries & Dataset

In [12]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [3]:
data = pd.read_csv("dataset/spam.csv", encoding='latin-1')

In [4]:
data.head()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


# Data Preprocessing

In [5]:
X = data['v2']
y = data['v1']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
vectorizer = TfidfVectorizer()
X_train_transformed = vectorizer.fit_transform(X_train)
X_test_transformed = vectorizer.transform(X_test)

# Train and Test the Model

In [8]:
classifier = SVC()
classifier.fit(X_train_transformed, y_train)

SVC()

In [9]:
y_pred = classifier.predict(X_test_transformed)

In [10]:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

In [11]:
print("Accuracy:", accuracy)
print("Classification Report:")
print(report)

Accuracy: 0.9820627802690582
Classification Report:
              precision    recall  f1-score   support

         ham       0.98      1.00      0.99       965
        spam       1.00      0.87      0.93       150

    accuracy                           0.98      1115
   macro avg       0.99      0.93      0.96      1115
weighted avg       0.98      0.98      0.98      1115



# Save the Model

In [13]:
joblib.dump(classifier, 'model/model.pkl')

['model/model.pkl']