# SPAM SMS DETECTION

### Import Libraries

In [None]:
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score, r2_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder

In [None]:
df = pd.read_csv("E:/Encryptix/Spam_SMS_Detection/Dataset/spam.csv", encoding='latin-1', engine = "python", usecols = ['v1','v2'])

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.shape

In [None]:
df.size

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.columns = ["label", "message"]

In [None]:
df.head()

In [None]:
df.isnull().values.any()

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().values.any()

In [None]:
df.duplicated().sum()

In [None]:
df.drop_duplicates(inplace=True)

In [None]:
df.describe()

In [None]:
df.head()

### Record Visualization

In [None]:
sns.displot(df.label, kde =True, color = "red")

In [None]:
sns.displot(df.label, color = "pink")

### LabelEncoder to encode categorical variable(label) to numeric.

In [None]:
encoder = LabelEncoder()
df["label"] = encoder.fit_transform(df["label"].values)

In [None]:
df.head()

### TF-IDF Vectorization

In [None]:
vectorizer = TfidfVectorizer()

In [None]:
X = vectorizer.fit_transform(df["message"])
X.toarray()

In [None]:
y = df["label"]

In [None]:
X.shape

In [None]:
y.shape

In [None]:
df.count()

### Spam label Visualization on the basis of records in pie-chart

In [None]:
p, k = len(df[df["label"] == 0]), len(df[df["label"] == 1])

In [None]:
label = np.array(["Ham", "Spam"])
values = np.array([p, k])
plt.figure(figsize=(5, 5))
plt.pie(values, labels=label, autopct="%.1f%%")
plt.show()

### Preprocess the data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Train Naive Bayes Classifier

In [None]:
naive_bayes_model = MultinomialNB()

In [None]:
naive_bayes_model.fit(X_train, y_train)

In [None]:
nb_predictions = naive_bayes_model.predict(X_test)

In [None]:
print("Naive Bayes Model:")
print(confusion_matrix(y_test, nb_predictions))
print(classification_report(y_test, nb_predictions))
print("Accuracy: ", accuracy_score(y_test, nb_predictions))
print("r2_Score: ", r2_score(y_test, nb_predictions))
print("Precision_score: ", precision_score(y_test, nb_predictions))
print("Recall_score: ", recall_score(y_test, nb_predictions))
print("f1_score: ", f1_score(y_test, nb_predictions))

### Train Logistic Regression Model

In [None]:
logistic_regression_model = LogisticRegression()

In [None]:
logistic_regression_model.fit(X_train, y_train)

In [None]:
lr_predictions = logistic_regression_model.predict(X_test)

In [None]:
print("Logistic Regression Model:")
print(confusion_matrix(y_test, lr_predictions))
print(classification_report(y_test, lr_predictions))
print("Accuracy: ", accuracy_score(y_test, lr_predictions))
print("r2_Score: ", r2_score(y_test, lr_predictions))
print("Precision_score: ", precision_score(y_test, lr_predictions))
print("Recall_score: ", recall_score(y_test, lr_predictions))
print("f1_score: ", f1_score(y_test, lr_predictions))

### Train Support Vector Machine (SVM) Model

In [None]:
svm_model = SVC(kernel='linear')

In [None]:
svm_model.fit(X_train, y_train)

In [None]:
svm_predictions = svm_model.predict(X_test)

In [None]:
print("Support Vector Machine (SVM) Model:")
print(confusion_matrix(y_test, svm_predictions))
print(classification_report(y_test, svm_predictions))
print("Accuracy: ", accuracy_score(y_test, svm_predictions))
print("r2_Score: ", r2_score(y_test, svm_predictions))
print("Precision_score: ", precision_score(y_test, svm_predictions))
print("Recall_score: ", recall_score(y_test, svm_predictions))
print("f1_score: ", f1_score(y_test, svm_predictions))