# Import the dataset

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
spam = pd.read_csv("datasets/spam.csv")
spam.head()

# Check the shape of the dataset

In [None]:
spam.shape

# Check the columns present in the dataset

In [None]:
spam.columns

# Check the descriptive statistics of the dataset

In [None]:
spam.describe()

# Check the info of the dataset

In [None]:
spam.info()

In [None]:
spam["Label"].value_counts()

In [None]:
sns.countplot(data=spam, x=spam["Label"]).set_title(
    "Amount of spam and no-spam messages"
)
plt.show()

# Plotting Pie-Chart

In [None]:
count_Class = pd.value_counts(spam.Label, sort=True)
# Data to plot
labels = "Ham", "Spam"
sizes = [count_Class[0], count_Class[1]]
colors = ["gold", "yellowgreen"]  # 'lightcoral', 'lightskyblue'
explode = (0.1, 0.1)  # explode 1st slice

plt.pie(
    sizes,
    explode=explode,
    labels=labels,
    colors=colors,
    autopct="%1.1f%%",
    shadow=True,
    startangle=140,
)
plt.axis("equal")
plt.show()

# Extract the independent variables to create a dataframe X

In [None]:
X = spam["EmailText"]
X.head()

# Extract the dependent variables to create a dataframe y

In [None]:
y = spam["Label"]
y.head()

# Split X and y into train and test dataset with test_size = 0.20, random_state=0

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=0
)

# Check the shape of X and y of train dataset

In [None]:
print(X_train.shape)
print(y_train.shape)

# Check the shape of X and y of test dataset

In [None]:
print(X_test.shape)
print(y_test.shape)

# Applying various models of Machine Learning

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

cv = CountVectorizer()
trainCV = cv.fit_transform(X_train)
testCV = cv.transform(X_test)

In [None]:
from sklearn.naive_bayes import MultinomialNB

naive_bayes = MultinomialNB()
naive_bayes.fit(trainCV, y_train)
pred_NB = naive_bayes.predict(testCV)

In [None]:
from sklearn.metrics import accuracy_score

Accuracy_Score_NB = accuracy_score(y_test, pred_NB)
Accuracy_Score_NB

In [None]:
from sklearn.neighbors import KNeighborsClassifier

classifier_knn = KNeighborsClassifier()
classifier_knn.fit(trainCV, y_train)
pred_knn = classifier_knn.predict(testCV)

In [None]:
Accuracy_Score_knn = accuracy_score(y_test, pred_knn)
Accuracy_Score_knn

In [None]:
from sklearn.svm import SVC

classifier_svm_linear = SVC(kernel="linear")
classifier_svm_linear.fit(trainCV, y_train)
pred_svm_linear = classifier_svm_linear.predict(testCV)

In [None]:
Accuracy_Score_SVM_Linear = accuracy_score(y_test, pred_svm_linear)
Accuracy_Score_SVM_Linear

In [None]:
classifier_svm_rbf = SVC(kernel="rbf")
classifier_svm_rbf.fit(trainCV, y_train)
pred_svm_rbf = classifier_svm_rbf.predict(testCV)

In [None]:
Accuracy_Score_SVM_Gaussion = accuracy_score(y_test, pred_svm_rbf)
Accuracy_Score_SVM_Gaussion

In [None]:
classifier_svm_poly = SVC(kernel="poly")
classifier_svm_poly.fit(trainCV, y_train)
pred_svm_poly = classifier_svm_poly.predict(testCV)

In [None]:
Accuracy_Score_SVM_Polynomial = accuracy_score(y_test, pred_svm_poly)
Accuracy_Score_SVM_Polynomial

In [None]:
classifier_svm_sigmoid = SVC(kernel="sigmoid")
classifier_svm_sigmoid.fit(trainCV, y_train)
pred_svm_sigmoid = classifier_svm_sigmoid.predict(testCV)

In [None]:
Accuracy_Score_svm_Sigmoid = accuracy_score(y_test, pred_svm_sigmoid)
Accuracy_Score_svm_Sigmoid

In [None]:
from sklearn.tree import DecisionTreeClassifier

classifier_dt = DecisionTreeClassifier()
classifier_dt.fit(trainCV, y_train)
pred_dt = classifier_dt.predict(testCV)

In [None]:
Accuracy_Score_dt = accuracy_score(y_test, pred_dt)
Accuracy_Score_dt

In [None]:
from sklearn.ensemble import RandomForestClassifier

classifier_rf = RandomForestClassifier()
classifier_rf.fit(trainCV, y_train)
pred_rf = classifier_rf.predict(testCV)

In [None]:
Accuracy_Score_rf = accuracy_score(y_test, pred_rf)
Accuracy_Score_rf

In [None]:
print("K-Nearest Neighbors =", Accuracy_Score_knn)
print("Naive Bayes =", Accuracy_Score_NB)
print("Support Vector Machine Linear =", Accuracy_Score_SVM_Linear)
print("Support Vector Machine Gaussion =", Accuracy_Score_SVM_Gaussion)
print("Support Vector Machine Polynomial =", Accuracy_Score_SVM_Polynomial)
print("Support Vector Machine Sigmoid =", Accuracy_Score_svm_Sigmoid)
print("Decision Tree =", Accuracy_Score_dt)
print("Random Forest =", Accuracy_Score_rf)