##**Loading the dataset**

In [0]:
import pandas as pd # library for data analysis
messages = pd.read_csv('/content/SMSSpamCollection', sep='\t',
                           names=["label", "message"])

##**Data cleaning and preprocessing**

In [0]:
import re # Library for String searching and manipulation
import nltk # Library for language processing
nltk.download('stopwords') # Download the required package

In [0]:
from nltk.corpus import stopwords # Library to remove stopwords
from nltk.stem.porter import PorterStemmer # Library to implement stemming
ps = PorterStemmer()
corpus = []
for i in range(0, len(messages)):
    review = re.sub('[^a-zA-Z]', ' ', messages['message'][i])
    review = review.lower()
    review = review.split()
    
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)

##**Creating the Bag of Words model**

In [0]:
from sklearn.feature_extraction.text import CountVectorizer #Library to convert a collection of text documents to a matrix of token counts
cv = CountVectorizer(max_features=2500)
X = cv.fit_transform(corpus).toarray()

y=pd.get_dummies(messages['label']) # Performing one-hot encoding onto to our target variables
y=y.iloc[:,1].values # Remove 1 column from the generated dummy output


##**Train Test Split**

In [0]:
from sklearn.model_selection import train_test_split # Library to split dataset into train and test 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)

##**Training model using Naive bayes classifier**

In [0]:
from sklearn.naive_bayes import MultinomialNB # Library to implement Naive Bayes classifier
spam_detect_model = MultinomialNB().fit(X_train, y_train) # Training the model

y_pred=spam_detect_model.predict(X_test) # Testing the model


##**Create Confusion Matrix**

In [0]:
from sklearn.metrics import confusion_matrix # Library to create a confusion matrix to evaluate the accuracy of the model
conf_matrix = confusion_matrix(y_pred,y_test)

In [10]:
conf_matrix

array([[955,   7],
       [ 11, 142]])

##**Checking the accuracy of our model**

In [17]:
from sklearn.metrics import accuracy_score # Library to get accuracy classification score
accuracy = accuracy_score(y_pred,y_test)
print("Accuracy is {:.2f}%".format(accuracy*100))

Accuracy is 98.39%
