<a href="https://colab.research.google.com/github/HillaryDrugs/li7/blob/main/Naive_Bayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import warnings
warnings.filterwarnings("ignore")  # hide pandas/sklearn warnings

import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline

RANDOM_SAMPLES = 3          # how many random messages to show each run
CSV_PATH = "spam.csv"       # dataset path (v1=label, v2=text)

# --- Load dataset ---
df = pd.read_csv(CSV_PATH, encoding="latin-1", engine="python", on_bad_lines="skip")
df = df.rename(columns={"v1": "label", "v2": "text"})
df = df[["label", "text"]].dropna()

# map labels: ham→0, spam→1
df["label"] = df["label"].astype(str).str.strip().str.lower().map({"ham": 0, "spam": 1})

X = df["text"].astype(str)
y = df["label"]

# --- Train/test split ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# --- Build & train model ---
model = Pipeline([
    ("vect", CountVectorizer()),
    ("tfidf", TfidfTransformer()),
    ("clf", MultinomialNB()),
])
model.fit(X_train, y_train)

# --- Accuracy ---
accuracy = model.score(X_test, y_test)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("-" * 52)

# --- Show RANDOM_SAMPLES random messages from dataset with predictions ---
sample_df = df.sample(RANDOM_SAMPLES, random_state=None)
for i, row in sample_df.iterrows():
    txt = row["text"]
    pred = model.predict([txt])[0]
    label = "SPAM" if pred == 1 else "HAM"
    print("Random Message:")
    print(txt)
    print(f"Prediction: {label}")
    print("-" * 52)

# --- Interactive user input classification ---
try:
    while True:
        user_text = input("\nType a message to classify (Enter to quit): ").strip()
        if user_text == "":
            print("Bye!")
            break
        pred = model.predict([user_text])[0]
        label = "SPAM" if pred == 1 else "HAM"
        print(f"Prediction: {label}")
except EOFError:
    # In case input() isn't available (e.g., some notebook runners)
    pass
