In [26]:
!pip install gradio
# Import pandas
import pandas as pd
# Import the required dependencies from sklearn
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

# Set the column width to view the text message data.
pd.set_option('max_colwidth', 200)

# Import Gradio
import gradio as gr



In [27]:
def sms_classification(sms_text_df):
    """
    Perform SMS classification using a pipeline with TF-IDF vectorization and Linear Support Vector Classification.

    Parameters:
    - sms_text_df (pd.DataFrame): DataFrame containing 'text_message' and 'label' columns for SMS classification.

    Returns:
    - text_clf (Pipeline): Fitted pipeline model for SMS classification.

    This function takes a DataFrame with 'text_message' and 'label' columns, splits the data into
    training and testing sets, builds a pipeline with TF-IDF vectorization and Linear Support Vector
    Classification, and fits the model to the training data.
    The fitted pipeline is returned to make future predictions.
    """
    # Set the features variable to the text message column.
    features = sms_text_df['text_message']
    # Set the target variable to the "label" column.
    target = sms_text_df['label']

    # Split data into training and testing and set the test_size = 33%
    X_train, X_test, y_train, y_test = train_test_split(
        features, target, test_size=0.33, random_state=42, stratify=target
    )  # Added stratify=target

    # Build a pipeline to transform the test set to compare to the training set.
    text_clf = Pipeline([
        ('tfidf', TfidfVectorizer(stop_words='english', ngram_range=(1, 2))),
        ('classifier', LinearSVC())
    ])
    # Fit the model to the transformed training data and return model.
    text_clf.fit(X_train, y_train)

    return text_clf

In [28]:
# Load the dataset into a DataFrame
sms_df = pd.read_csv('SMSSpamCollection.csv', sep='\t', names=['label', 'text_message'])

# Checking missing value
print(f"Number of missing values in text_message: {sms_df['text_message']. isna().sum()}")

# Dropping rows with missing values
sms_df = sms_df.dropna(subset=['text_message'])

# Display few rows to verify
sms_df.head()

Number of missing values in text_message: 5573


Unnamed: 0,label,text_message
5083,ham,"Yeah, give me a call if you've got a minute"
5084,ham,"HI BABE UAWAKE?FEELLIKW SHIT.JUSTFOUND OUT VIA ALETTER THATMUM GOTMARRIED 4thNOV.BEHIND OURBACKS  FUCKINNICE!SELFISH,DEVIOUSBITCH.ANYWAY,IL CALL U"""""""


In [18]:
# distribution of labels in the dataset
print(sms_df['label'].value_counts())

label
ham    2
Name: count, dtype: int64


In [29]:
# Create a small test dataset with both ham and spam messages
test_data = {
    'label': ['ham', 'ham', 'spam', 'ham', 'spam', 'spam', 'ham', 'ham'],
    'text_message': [
        'Hey, how are you doing today?',
        'Don\'t forget to pick up milk on your way home',
        'URGENT: You have won $5,000! Call now to claim your prize!',
        'Meeting at 3pm in conference room B',
        'Congratulations! You\'ve been selected for a free vacation! Text YES to claim.',
        'Your account has been locked. Reply with your password to reactivate',
        'I\'ll be there in about 15 minutes',
        'What time does the movie start?'
    ]
}

sms_df = pd.DataFrame(test_data)
print(sms_df['label'].value_counts())

label
ham     5
spam    3
Name: count, dtype: int64


In [30]:
# Call the sms_classification function with the DataFrame and set the result to the "text_clf" variable
text_clf = sms_classification(sms_df)

In [31]:
# Create a function called `sms_prediction` that takes in the SMS text and predicts the whether the text is "not spam" or "spam".
# The function should return the SMS message, and say whether the text is "not spam" or "spam".
def sms_prediction(text):
    """
    Predict the spam/ham classification of a given text message using a pre-trained model.

    Parameters:
    - text (str): The text message to be classified.

    Returns:
    - str: A message indicating whether the text message is classified as spam or not.

    This function takes a text message and a pre-trained pipeline model, then predicts the
    spam/ham classification of the text. The result is a message stating whether the text is
    classified as spam or not.
    """
    # Create a variable that will hold the prediction of a new text.
    prediction = text_clf.predict([text])[0]
    # Using a conditional if the prediction is "ham" return the message:
    # f'The text message: "{text}", is not spam.' Else, return f'The text message: "{text}", is spam.'
    if prediction == 'ham':
      return f'The text message: "{text}", is not spam.'
    else:
       return f'The text message: "{text}", is spam.'

In [32]:
# Create a sms_app that takes a textbox for the inputs and has a textbox for the output.
# Povide labels for each textbox.
sms_app = gr.Interface(
    fn=sms_prediction,
    inputs=gr.Textbox(label="SMS Message", placeholder="Enter a text message to classify..."), # Changed 'lable' to 'label'
    outputs=gr.Textbox(label="Classification Result"), # Added outputs argument
    title="SMS Spam Detector",
    description="Enter an SMS message to find out if it's spam or not.",
    examples=[
        ["You are a lucky winner of $5000"],
        ["You won 2 free tickets to the Super Bowl"],
        ["You won 2 free tickets to the Super Bowl text us to claim your prize"],
        ["Thanks for registering. Text 4343 to receive free updates on medicare"],


    ]
)

# Launch the app.
sms_app.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a00746536a64de5500.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




## Test the following text messages.

---

1. You are a lucky winner of $5000!
**Not Spam**
2. You won 2 free tickets to the Super Bowl.
**Spam**
3. You won 2 free tickets to the Super Bowl text us to claim your prize.
**Spam**
4. Thanks for registering. Text 4343 to receive free updates on medicare.
**Spam**