loading datas and all :)

In [1]:
# --- BLOCK 1: SETUP & DATA LOADING ---
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

print("1. Downloading dataset...")
# Load the dataset directly from a URL (no manual upload needed)
url = "https://raw.githubusercontent.com/mohitgupta-omg/Kaggle-SMS-Spam-Collection-Dataset-/master/spam.csv"
df = pd.read_csv(url, encoding='latin-1')

# Clean the data (keep only relevant columns)
df = df[['v1', 'v2']]
df.columns = ['label', 'message']

# Show the first few rows so we know it worked
print("Data loaded successfully!")
print(f"Total messages: {len(df)}")
df.head()

1. Downloading dataset...
Data loaded successfully!
Total messages: 5572


Unnamed: 0,label,message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


training..

In [3]:
# --- BLOCK 2: TRAINING THE MODEL ---
print("2. Training the AI...")

# Convert 'ham'/'spam' to 0 and 1 so the computer understands
df['label_num'] = df['label'].map({'ham': 0, 'spam': 1})

# Split data: 80% for training, 20% for testing
X_train, X_test, y_train, y_test = train_test_split(
    df['message'], df['label_num'], test_size=0.2, random_state=42
)

# Create the "Vectorizer" (converts words to numbers)
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train the Multinomial Naive Bayes model
model = MultinomialNB()
model.fit(X_train_vec, y_train)

# Calculate accuracy immediately
predictions = model.predict(X_test_vec)
accuracy = accuracy_score(y_test, predictions)

print(f"ðŸŽ‰ Model Trained! Accuracy: {accuracy * 100:.2f}%")

2. Training the AI...
ðŸŽ‰ Model Trained! Accuracy: 98.39%


saving model

In [4]:
# --- BLOCK 3: SAVING THE MODEL ---
import joblib

print("3. Saving model files...")

# Save the model and the vectorizer
joblib.dump(model, 'spam_model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')

print("âœ… Model saved as 'spam_model.pkl'")
print("âœ… Vectorizer saved as 'vectorizer.pkl'")
print("Check the 'Files' tab on the left to see them!")

3. Saving model files...
âœ… Model saved as 'spam_model.pkl'
âœ… Vectorizer saved as 'vectorizer.pkl'
Check the 'Files' tab on the left to see them!


run this cutie :)

In [5]:
# --- BLOCK 4: LIVE TESTING ---
print("-" * 30)
print("ðŸ¤– SMS SPAM DETECTOR IS LIVE")
print("-" * 30)

def predict_message(text):
    # Transform the text using the SAME vectorizer we trained with
    text_vec = vectorizer.transform([text])
    prediction = model.predict(text_vec)
    return "ðŸš¨ SPAM" if prediction[0] == 1 else "âœ… HAM (Safe)"

while True:
    user_input = input("\nEnter a message (or type 'exit'): ")
    if user_input.lower() == 'exit':
        print("Goodbye!")
        break

    result = predict_message(user_input)
    print(f"Result: {result}")

------------------------------
ðŸ¤– SMS SPAM DETECTOR IS LIVE
------------------------------

Enter a message (or type 'exit'): sup gang
Result: âœ… HAM (Safe)

Enter a message (or type 'exit'): khd b igiagogfaf
Result: âœ… HAM (Safe)

Enter a message (or type 'exit'): URGENT! You have won a 1 week FREE membership in our Â£100,000 Prize Jackpot! Txt the word: CLAIM to No: 81010
Result: ðŸš¨ SPAM


KeyboardInterrupt: Interrupted by user