<a href="https://colab.research.google.com/github/SuryaReddy1925/Bharat-Intern/blob/main/SMSClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.pipeline import Pipeline

# Download NLTK resources
import nltk
nltk.download('stopwords')

# Load the SMS Spam Collection dataset
sms_data = pd.read_csv('spam.csv', encoding='latin-1')

# Display the first few rows of the dataset to understand its structure
print(sms_data.head())

# Drop irrelevant columns and rename the remaining columns
sms_data = sms_data[['v1', 'v2']]
sms_data.columns = ['label', 'text']

# Convert labels to binary values (0 for 'ham', 1 for 'spam')
sms_data['label'] = sms_data['label'].map({'ham': 0, 'spam': 1})

# Split the dataset into features (X) and target variable (y)
X = sms_data['text']
y = sms_data['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a text classification pipeline with a naive Bayes classifier
text_clf = Pipeline([
    ('vectorizer', CountVectorizer(analyzer='word', stop_words=stopwords.words('english'), max_features=5000)),
    ('classifier', MultinomialNB())
])

# Train the classifier
text_clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = text_clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Display the results
print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(report)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


     v1                                                 v2 Unnamed: 2  \
0   ham  Go until jurong point, crazy.. Available only ...        NaN   
1   ham                      Ok lar... Joking wif u oni...        NaN   
2  spam  Free entry in 2 a wkly comp to win FA Cup fina...        NaN   
3   ham  U dun say so early hor... U c already then say...        NaN   
4   ham  Nah I don't think he goes to usf, he lives aro...        NaN   

  Unnamed: 3 Unnamed: 4  
0        NaN        NaN  
1        NaN        NaN  
2        NaN        NaN  
3        NaN        NaN  
4        NaN        NaN  
Accuracy: 0.98

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       965
           1       0.94      0.91      0.93       150

    accuracy                           0.98      1115
   macro avg       0.97      0.95      0.96      1115
weighted avg       0.98      0.98      0.98      1115

