In [2]:
# Importing libraries
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Sample Dataset (you can replace this with a real dataset)
data = {
    'message': [
        'Free money now!!!',
        'Meeting at 3 PM',
        'Get cheap loans now',
        'Hey, how are you?',
        'You have won a lottery!',
        'Let\'s catch up tomorrow',
        'Congrats, you are eligible for a discount',
        'How about coffee tomorrow?',
        'Call me for your prize',
        'Can we reschedule our meeting?'
    ],
    'label': ['spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham']
}

# Convert the data into a DataFrame
df = pd.DataFrame(data)

# Split the dataset into features (X) and target (y)
X = df['message']
y = df['label']

# Convert text data into numerical features using CountVectorizer
vectorizer = CountVectorizer(stop_words='english')  # Remove common words (like 'the', 'is', etc.)
X_vec = vectorizer.fit_transform(X)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)

# Train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict on the test data
y_pred = model.predict(X_test)

# Evaluate the model
print(f'Accuracy: {accuracy_score(y_test, y_pred):.4f}')
print('Classification Report:')
print(classification_report(y_test, y_pred))



Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

         ham       1.00      1.00      1.00         1
        spam       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



In [3]:
# Sample new messages to test the model
new_messages = [
    "You have a meeting scheduled at 10 AM tomorrow",
    "Claim your free vacation now, call us today!",
    "Are we still on for lunch today?",
    "Limited time offer on electronics, shop now!"
]

# Preprocess the new messages (convert to numerical features)
new_messages_vec = vectorizer.transform(new_messages)

# Predict using the trained model
predictions = model.predict(new_messages_vec)

# Print the predictions
for message, prediction in zip(new_messages, predictions):
    print(f"Message: {message}")
    print(f"Prediction: {prediction}")
    print("-" * 50)


Message: You have a meeting scheduled at 10 AM tomorrow
Prediction: ham
--------------------------------------------------
Message: Claim your free vacation now, call us today!
Prediction: spam
--------------------------------------------------
Message: Are we still on for lunch today?
Prediction: spam
--------------------------------------------------
Message: Limited time offer on electronics, shop now!
Prediction: spam
--------------------------------------------------
