In [None]:
import pandas as pd
import re
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer

# Load the dataset
file_path = "/content/text_dataset.csv"  # Update with your file path
df = pd.read_csv(file_path)

# Label encoding: 'spam' -> 1, 'ham' -> 0
df["Label"] = df["Label"].map({"spam": 1, "ham": 0})

# Save the preprocessed dataset
df.to_csv("text_preprocessed_dataset.csv", index=False)
print("Preprocessed dataset saved as preprocessed_dataset.csv")



Preprocessed dataset saved as preprocessed_dataset.csv


In [None]:
# Split data into training and testing sets
X = df["Message"]
y = df["Label"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert text data to numerical features using TF-IDF
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train RandomForest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_tfidf, y_train)

# Predict on test data
y_pred = model.predict(X_test_tfidf)

In [None]:
import pickle

# Save both model and vectorizer in one file
with open("spam_classifier_combined.pkl", "wb") as file:
    pickle.dump({"model": model, "vectorizer": vectorizer}, file)

print("Model and vectorizer saved together in spam_classifier_combined.pkl")


Model and vectorizer saved together in spam_classifier_combined.pkl


In [None]:
import pickle

# Load the trained model and vectorizer
with open("spam_classifier_combined.pkl", "rb") as file:
    data = pickle.load(file)

model = data["model"]
vectorizer = data["vectorizer"]

# Sample test message
sample_text = ["Congratulations! You won a free lottery ticket. Call now to claim your prize!"]

# Convert text to numerical features using the loaded vectorizer
sample_text_tfidf = vectorizer.transform(sample_text)

# Predict using the trained model
prediction = model.predict(sample_text_tfidf)

# Print the result
label = "Spam" if prediction[0] == 1 else "Not Spam"
print(f"Predicted Label: {label}")


Predicted Label: Spam
