<a href="https://colab.research.google.com/github/AkankshaB0105/CODSOFT/blob/main/SPAM_SMS_DETECTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive

In [None]:
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.svm import SVC
import joblib


In [None]:
#LOAD THE SMS DATASET
df = pd.read_csv('/content/drive/MyDrive/codsoftdata/SPAM SMS DETECTION/spam.csv', encoding= 'latin-1')

In [None]:
print(df.head())

     v1                                                 v2 Unnamed: 2  \
0   ham  Go until jurong point, crazy.. Available only ...        NaN   
1   ham                      Ok lar... Joking wif u oni...        NaN   
2  spam  Free entry in 2 a wkly comp to win FA Cup fina...        NaN   
3   ham  U dun say so early hor... U c already then say...        NaN   
4   ham  Nah I don't think he goes to usf, he lives aro...        NaN   

  Unnamed: 3 Unnamed: 4  
0        NaN        NaN  
1        NaN        NaN  
2        NaN        NaN  
3        NaN        NaN  
4        NaN        NaN  


In [None]:
print(df.columns)

Index(['v1', 'v2', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], dtype='object')


In [None]:
#PREPROCESS THE DATA
x = df['v2']
y = df['v1']

In [None]:
#SPLIT THE DATA INTO TRAINING AND TESTING SETS
x_train , x_test , y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42 )

In [None]:
print("train dataset: ")
print(x_train[:5])

train dataset: 
1978    No I'm in the same boat. Still here at my moms...
3989    (Bank of Granite issues Strong-Buy) EXPLOSIVE ...
3935       They r giving a second chance to rahul dengra.
4078       O i played smash bros  &lt;#&gt;  religiously.
4086    PRIVATE! Your 2003 Account Statement for 07973...
Name: v2, dtype: object


In [None]:
print("test dataset: ")
print(x_test[:5])

test dataset: 
3245    Funny fact Nobody teaches volcanoes 2 erupt, t...
944     I sent my scores to sophas and i had to do sec...
1044    We know someone who you know that fancies you....
2484    Only if you promise your getting out as SOON a...
812     Congratulations ur awarded either å£500 of CD ...
Name: v2, dtype: object


In [None]:
#TF-IDF VECTORIZATION
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = tfidf_vectorizer.fit_transform(x_train)
X_test_tfidf = tfidf_vectorizer.transform(x_test)

In [None]:
#TRAIN NAIVE BAYES CLASSIFIER
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train_tfidf, y_train)

In [None]:
#PREDICTIONS AND EVALUATIONS FOR NAIVE BAYES
nb_predictions = nb_classifier.predict(X_test_tfidf)
print("Naive bayes accuracy:" , accuracy_score(y_test, nb_predictions))
print("ACCURACY FOR NAIVE BAYES IN%: ", accuracy_score(y_test, nb_predictions)*100,"%")
print("naive bayes classification report: ")
print(classification_report(y_test, nb_predictions))
print("navie bayes confusion matrix: ")
print(confusion_matrix(y_test, nb_predictions))

Naive bayes accuracy: 0.9721973094170404
ACCURACY FOR NAIVE BAYES IN%:  97.21973094170404 %
naive bayes classification report: 
              precision    recall  f1-score   support

         ham       0.97      1.00      0.98       965
        spam       1.00      0.79      0.88       150

    accuracy                           0.97      1115
   macro avg       0.98      0.90      0.93      1115
weighted avg       0.97      0.97      0.97      1115

navie bayes confusion matrix: 
[[965   0]
 [ 31 119]]


In [None]:
#TRAIN LOGISTIC REGRESSION CLASSIFIER
lr_classifier = LogisticRegression()
lr_classifier.fit(X_train_tfidf, y_train)

In [None]:
#PREDICTION AND EVALUATION FOR LOGISTIC REGRESSION
lr_predictions = lr_classifier.predict(X_test_tfidf)
print("\n logistic regression accuracy:" , accuracy_score(y_test, lr_predictions))
print("ACCURACY FOR  LOGISTIC REGRESSION IN%: ", accuracy_score(y_test, lr_predictions)*100,"%")
print("LOGISTIC REGRESSION classification report: ")
print(classification_report(y_test, lr_predictions))
print("LOGISTIC REGRESSION confusion matrix: ")
print(confusion_matrix(y_test, lr_predictions))


 logistic regression accuracy: 0.957847533632287
ACCURACY FOR  LOGISTIC REGRESSION IN%:  95.7847533632287 %
LOGISTIC REGRESSION classification report: 
              precision    recall  f1-score   support

         ham       0.96      1.00      0.98       965
        spam       0.97      0.71      0.82       150

    accuracy                           0.96      1115
   macro avg       0.96      0.85      0.90      1115
weighted avg       0.96      0.96      0.95      1115

LOGISTIC REGRESSION confusion matrix: 
[[962   3]
 [ 44 106]]


In [None]:
#TRAIN SUPPORT VECTOR MACHINE (SVM) CLASSIFIER
svm_classifier = SVC()
svm_classifier.fit(X_train_tfidf, y_train)

In [None]:
#PREDICTIONS AND EVALUATIONS FOR SVM
svm_predictions = lr_classifier.predict(X_test_tfidf)
print("\n SVM accuracy:" , accuracy_score(y_test, svm_predictions))
print("ACCURACY FOR  SVM IN%: ", accuracy_score(y_test, svm_predictions)*100,"%")
print("SVM classification report: ")
print(classification_report(y_test, svm_predictions))
print("SVM confusion matrix: ")
print(confusion_matrix(y_test, svm_predictions))



 SVM accuracy: 0.957847533632287
ACCURACY FOR  SVM IN%:  95.7847533632287 %
SVM classification report: 
              precision    recall  f1-score   support

         ham       0.96      1.00      0.98       965
        spam       0.97      0.71      0.82       150

    accuracy                           0.96      1115
   macro avg       0.96      0.85      0.90      1115
weighted avg       0.96      0.96      0.95      1115

SVM confusion matrix: 
[[962   3]
 [ 44 106]]


In [None]:
joblib.dump(svm_classifier,'svm_model.joblib')

['svm_model.joblib']

In [None]:
def predict_spam_or_legitimate(message):

  #PREPROCESS THE INPUT MESSAGE
  message_tfidf = tfidf_vectorizer.transform([message])

  #MAKE PREDICTIONS USING SVM MODEL
  #nb_predictions = nb_classifier.predict(message_tfidf)
  #lr_predictions = lr_classifier.predict(message_tfidf)
  svm_predictions = svm_classifier.predict(message_tfidf)
  svm_prediction = svm_predictions[0]
  return svm_predictions[0]

In [None]:
user_input = input("ENTER A SMS MEASSAGE: ")
svm_result = predict_spam_or_legitimate(user_input)
print(f"SVM PREDICTED RESULT: {svm_result}")

ENTER A SMS MEASSAGE: i will call you back
SVM PREDICTED RESULT: ham
