In [47]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [49]:
# Sample Dataset (Replace with real dataset)
data = {
    "message": [
        "Your OTP is 123456",
        "Win $1000 now! Click this link!",
        "Your package will arrive tomorrow.",
        "Urgent! Update your account details.",
        "Power bill due: Pay $50 now."
    ],
    "label": ["otp", "spam", "logistic", "fraud", "transactional"]  # Labels
}

df = pd.DataFrame(data)

In [51]:
# Convert labels to numeric
label_map = {"spam": 0, "fraud": 1, "otp": 2, "logistic": 3, "transactional": 4}
df['label'] = df['label'].map(label_map)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)

In [53]:
# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=500)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [55]:
# Logistic Regression Model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# Predictions
y_pred = model.predict(X_test_tfidf)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))



Accuracy: 0.0
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       1.0
           4       0.00      0.00      0.00       0.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [13]:
def classify_message(message):
    # Transform the message
    message_tfidf = vectorizer.transform([message])
    label_num = model.predict(message_tfidf)[0]
    
    # Map back to label
    label_reverse_map = {v: k for k, v in label_map.items()}
    return label_reverse_map[label_num]

# Test with new messages
test_messages = [
    "Your OTP is 789123",
    "Congratulations! You've won a free vacation.",
    "Your order has been shipped. Track it here."
]

for msg in test_messages:
    print(f"Message: '{msg}' -> Classified as: {classify_message(msg)}")


Message: 'Your OTP is 789123' -> Classified as: otp
Message: 'Congratulations! You've won a free vacation.' -> Classified as: transactional
Message: 'Your order has been shipped. Track it here.' -> Classified as: otp
