In [1]:
!pip install pandas scikit-learn




In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [4]:
df = pd.read_csv('spam.csv', encoding='latin-1')
df = df[['v1', 'v2']]  # Select relevant columns
df.columns = ['label', 'text']  # Rename columns

In [5]:
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

In [6]:
tfidf = TfidfVectorizer(stop_words='english', max_df=0.7)
X = tfidf.fit_transform(df['text'])
y = df['label']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
model = MultinomialNB()
model.fit(X_train, y_train)


In [9]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.968609865470852
Confusion Matrix:
 [[965   0]
 [ 35 115]]
Classification Report:
               precision    recall  f1-score   support

           0       0.96      1.00      0.98       965
           1       1.00      0.77      0.87       150

    accuracy                           0.97      1115
   macro avg       0.98      0.88      0.93      1115
weighted avg       0.97      0.97      0.97      1115



In [10]:
def predict_spam(text):
    text_transformed = tfidf.transform([text])
    prediction = model.predict(text_transformed)
    return 'Spam' if prediction[0] == 1 else 'Ham'

In [11]:
input_text = input("Please enter the text you want to classify: ")
result = predict_spam(input_text)
print("The entered text is classified as:", result)

Please enter the text you want to classify: Congratulations! You've won a $1,000 Walmart gift card. Go to http://bit.ly/123456 to claim now.
The entered text is classified as: Spam
