<a href="https://colab.research.google.com/github/Santhiya1005/AI_Projects/blob/main/SpamorNotSpam.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 1. Load dataset
data = pd.read_csv("/content/sample_data/spam_or_not_spam.csv")

# 2. Show first rows & columns (for safety)
print(data.head())
print(data.columns)

# 3. AUTO select text column (first object/text column)
text_col = data.select_dtypes(include=["object"]).columns[0]
label_col = data.columns.difference([text_col])[0]

# 4. Rename properly
data = data[[label_col, text_col]]
data.columns = ["label", "message"]

# 5. Convert label to numeric (if needed)
if data["label"].dtype == "object":
    data["label"] = data["label"].map({"spam": 1, "ham": 0})

# 6. Convert message to string
data["message"] = data["message"].astype(str)

# 7. TF-IDF
vec = TfidfVectorizer()
X = vec.fit_transform(data["message"])
y = data["label"]

# 8. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# 9. Train logistic regression
model = LogisticRegression()
model.fit(X_train, y_train)

# 10. Accuracy
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

# 11. Predict new message
msg = ["Free recharge offer just for you"]
msg_vec = vec.transform(msg)

print("Prediction:", "SPAM" if model.predict(msg_vec)[0] == 1 else "NOT SPAM")


                                               email  label
0   date wed NUMBER aug NUMBER NUMBER NUMBER NUMB...      0
1  martin a posted tassos papadopoulos the greek ...      0
2  man threatens explosion in moscow thursday aug...      0
3  klez the virus that won t die already the most...      0
4   in adding cream to spaghetti carbonara which ...      0
Index(['email', 'label'], dtype='object')
Accuracy: 0.96
Prediction: NOT SPAM
