In [23]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

In [2]:
df = pd.read_csv("Spam_SMS.csv")
df.head()

Unnamed: 0,Class,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [3]:
print(df.shape)

(5574, 2)


In [7]:
# checking data types of the columns
print(df.dtypes)

Class      object
Message    object
dtype: object


In [6]:
# Checking for duplicate data
duplicate_data = df.duplicated().sum()
print(duplicate_data)

415


In [9]:
df = df.drop_duplicates()

In [10]:
# Checking for duplicate data
duplicate_data = df.duplicated().sum()
print(duplicate_data)

0


In [8]:
# check for missing values
missing_val = df.isnull().sum()
print(missing_val)

Class      0
Message    0
dtype: int64


In [16]:
# Tokenization and stopword removal using TfidfVectorizer
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(df['Message'])

# Label Encoding for 'ham' and 'spam'
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['Class'])

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [21]:
# Initialize model
logistic_model = LogisticRegression()

# Train model
logistic_model.fit(X_train, y_train)

# Make predictions
logistic_predictions = logistic_model.predict(X_test)

In [24]:
# Evaluate model
print("Logistic Regression:")
print(classification_report(y_test, logistic_predictions))
print("Accuracy:", accuracy_score(y_test, logistic_predictions))

Logistic Regression:
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       465
           1       0.97      0.75      0.84        51

    accuracy                           0.97       516
   macro avg       0.97      0.87      0.91       516
weighted avg       0.97      0.97      0.97       516

Accuracy: 0.9728682170542635


In [25]:
def predict_message(message):
    # Preprocess the input message
    message_transformed = vectorizer.transform([message])

    # Predict using the trained Logistic Regression model
    prediction = logistic_model.predict(message_transformed)

    # Convert the numerical prediction back to "ham" or "spam"
    if prediction == 0:
        return "ham"
    else:
        return "spam"

# Test the model with user input
user_message = input("Enter a message to classify as 'ham' or 'spam': ")
print(f'The message is: {predict_message(user_message)}')


Enter a message to classify as 'ham' or 'spam':  Congratulations! You’ve been selected to win a free iPhone! Click the link to claim your prize now: [link]. Hurry, offer ends soon!


The message is: spam


In [26]:
import joblib

# Save the trained Logistic Regression model
joblib.dump(logistic_model, 'spam_classifier_model.pkl')

# Save the vectorizer
joblib.dump(vectorizer, 'vectorizer.pkl')


['vectorizer.pkl']

In [None]:
import joblib

# Load the saved model and vectorizer
loaded_model = joblib.load('spam_classifier_model.pkl')
loaded_vectorizer = joblib.load('vectorizer.pkl')

def predict_message(message):
    # Preprocess the input message
    message_transformed = loaded_vectorizer.transform([message])

    # Predict using the loaded model
    prediction = loaded_model.predict(message_transformed)

    # Convert the numerical prediction back to "ham" or "spam"
    return "ham" if prediction == 0 else "spam"

# Infinite loop to take user input and make predictions
while True:
    user_message = input("Enter a message to classify as 'ham' or 'spam' (type 'exit' to quit): ")
    
    if user_message.lower() == 'exit':
        break
    
    print(f'The message is: {predict_message(user_message)}')


Enter a message to classify as 'ham' or 'spam' (type 'exit' to quit):  hello


The message is: ham


Enter a message to classify as 'ham' or 'spam' (type 'exit' to quit):  Claim your $1000 gift card now! Click here to redeem.


The message is: spam


Enter a message to classify as 'ham' or 'spam' (type 'exit' to quit):  You’ve won a free vacation! Call now to claim your prize


The message is: spam


Enter a message to classify as 'ham' or 'spam' (type 'exit' to quit):  URGENT: Your account has been compromised. Click here to reset your password


The message is: ham


Enter a message to classify as 'ham' or 'spam' (type 'exit' to quit):  Limited offer! Get a loan with zero interest. Apply now!


The message is: ham


Enter a message to classify as 'ham' or 'spam' (type 'exit' to quit):  Free trial of our weight loss program. Sign up today!


The message is: ham


Enter a message to classify as 'ham' or 'spam' (type 'exit' to quit):  You’ve been pre-approved for a credit card. Act fast!


The message is: ham


Enter a message to classify as 'ham' or 'spam' (type 'exit' to quit):  Get rich quick with this amazing opportunity


The message is: ham


Enter a message to classify as 'ham' or 'spam' (type 'exit' to quit):  Download this app to earn rewards instantly!


The message is: ham
