# **SPAM CLASSIFIER**

### **Loading the Dataset**

In [None]:
import pandas as pd

messages = pd.read_csv('/content/drive/MyDrive/smsspamcollection/SMSSpamCollection', sep='\t',
                           names=["label", "message"])

In [None]:
messages

In [None]:
messages['message'].loc[2]

In [None]:
messages['label'].loc[2]

In [None]:
messages.isnull().sum()

### **Text Preprocessing**

In [None]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

wordnet=WordNetLemmatizer()
corpus = []

for i in range(0, len(messages)):
    message = re.sub('[^a-zA-Z]', ' ', messages['message'][i])
    message = message.lower()
    message = message.split()  
    message = [wordnet.lemmatize(word) for word in message if not word in stopwords.words('english')]
    message = ' '.join(message)
    corpus.append(message)

### **Feature Extraction**

**Bag of Words**

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

cv = CountVectorizer(max_features=2500)
X = cv.fit_transform(corpus).toarray()
print(X)

In [None]:
messages['label']

In [None]:
Y = pd.get_dummies(messages['label'])

In [None]:
Y

In [None]:
Y = Y.iloc[:,1].values

In [None]:
print(Y)

### **Modeling**

**Multinomial Naive Bayes**

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=0)

In [None]:
from sklearn.naive_bayes import MultinomialNB

spam_detect_model = MultinomialNB().fit(X_train, Y_train)

In [None]:
Y_pred = spam_detect_model.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score

score = accuracy_score(Y_test, Y_pred)
print(score)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(Y_pred, Y_test))

### **Feature Extraction**

**TF-IDF**

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

tv = TfidfVectorizer(max_features=2500)
X = tv.fit_transform(corpus).toarray()

### **Modeling**

**Multinomial Naive Bayes**

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=0)

In [None]:
from sklearn.naive_bayes import MultinomialNB

spam_detect_model = MultinomialNB().fit(X_train, Y_train)

In [None]:
Y_pred = spam_detect_model.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score

score = accuracy_score(Y_test, Y_pred)
print(score)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(Y_pred, Y_test))

**Random Forest Classifier**

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=0)

In [None]:
from sklearn.ensemble import RandomForestClassifier

spam_detect_model = RandomForestClassifier().fit(X_train, Y_train)

In [None]:
Y_pred = spam_detect_model.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score

score = accuracy_score(Y_test, Y_pred)
print(score)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(Y_pred, Y_test))