In [2]:
# Import pandas
import pandas as pd
# Import the required dependencies from sklearn
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

# Set the column width to view the text message data.
pd.set_option('max_colwidth', 200)

# Install and import Gradio
%pip install gradio
import gradio as gr

Collecting gradio
  Downloading gradio-5.12.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.4 (from gradio)
  Downloading gradio_client-1.5.4-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.27.1-py3-none-any.whl.metadata (13 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.14-cp310-cp310-win_amd64.whl.metadata (42 kB)
     ---------------------------------------- 0.0/42.9 kB ? eta -:--:--
     -------------------------------------- - 41.0/42.9 kB ? eta -:--:--
     -----------

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
streamlit 1.32.0 requires protobuf<5,>=3.20, but you have protobuf 3.19.6 which is incompatible.


In [3]:
def sms_classification(sms_text_df):
    """
    Perform SMS classification using a pipeline with TF-IDF vectorization and Linear Support Vector Classification.

    Parameters:
    - sms_text_df (pd.DataFrame): DataFrame containing 'text_message' and 'label' columns for SMS classification.

    Returns:
    - text_clf (Pipeline): Fitted pipeline model for SMS classification.

    This function takes a DataFrame with 'text_message' and 'label' columns, splits the data into
    training and testing sets, builds a pipeline with TF-IDF vectorization and Linear Support Vector
    Classification, and fits the model to the training data. 
    The fitted pipeline is returned to make future predictions.
    """
    # Set the features variable to the text message column.
    features = sms_text_df['text_message']    
    # Set the target variable to the "label" column.
    target = sms_text_df['label']
   

    # Split data into training and testing and set the test_size = 33%
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.33, random_state=42)
    

    # Build a pipeline to transform the test set to compare to the training set.
    text_clf = Pipeline([
        ('tfidf', TfidfVectorizer()),
        ('clf', LinearSVC())
    ])
    

    # Fit the model to the transformed training data and return model.
    text_clf.fit(X_train, y_train)
    return text_clf 

In [12]:
# Load the dataset into a DataFrame
sms_text_df = pd.read_csv(r'C:\Users\ReisH\OneDrive\Desktop\sms_spam_detector\M21_Starter_Code\Resources\SMSSpamCollection.csv')
# Drop the unnecessary columns if they exist
columns_to_drop = ['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4']
sms_text_df = sms_text_df.drop(columns=[col for col in columns_to_drop if col in sms_text_df.columns])
# Rename the columns to 'label' and 'text_message'
sms_text_df.columns = ['label', 'text_message']



In [13]:
# Call the sms_classification function with the DataFrame and set the result to the "text_clf" variable
text_clf = sms_classification(sms_text_df)

In [14]:
# Create a function called `sms_prediction` that takes in the SMS text and predicts the whether the text is "not spam" or "spam". 
# The function should return the SMS message, and say whether the text is "not spam" or "spam".
def sms_prediction(text):
    """
    Predict the spam/ham classification of a given text message using a pre-trained model.

    Parameters:
    - text (str): The text message to be classified.

    Returns:
    - str: A message indicating whether the text message is classified as spam or not.

    This function takes a text message and a pre-trained pipeline model, then predicts the
    spam/ham classification of the text. The result is a message stating whether the text is
    classified as spam or not.
    """
    # Create a variable that will hold the prediction of a new text.
    prediction = text_clf.predict([text])
    
    # Using a conditional if the prediction is "ham" return the message: 
    # f'The text message: "{text}", is not spam.' Else, return f'The text message: "{text}", is spam.'
    if prediction == 'ham':
        return f'The text message: "{text}", is not spam.'
    else:    
        return f'The text message: "{text}", is spam.' 

In [15]:
# Create a sms_app that takes a textbox for the inputs and has a textbox for the output.  
# Povide labels for each textbox. 
sms_app = gr.Interface(fn=sms_prediction, inputs="text", outputs="text", title="SMS Spam Detection")
    
# Launch the app.
sms_app.launch(share=True)

* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://f2e2391296892d8de5.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




## Test the following text messages. 

---

1. You are a lucky winner of $5000!
2. You won 2 free tickets to the Super Bowl.
3. You won 2 free tickets to the Super Bowl text us to claim your prize.
4. Thanks for registering. Text 4343 to receive free updates on medicare.

In [16]:
# Testing the SMS spam detector with provided text messages
test_messages = [
    "You are a lucky winner of $5000!",
    "You won 2 free tickets to the Super Bowl.",
    "You won 2 free tickets to the Super Bowl text us to claim your prize.",
    "Thanks for registering. Text 4343 to receive free updates on medicare."
]

# Iterating through each test message and printing the prediction result
for message in test_messages:
    result = sms_prediction(message)
    print(result)


The text message: "You are a lucky winner of $5000!", is not spam.
The text message: "You won 2 free tickets to the Super Bowl.", is spam.
The text message: "You won 2 free tickets to the Super Bowl text us to claim your prize.", is spam.
The text message: "Thanks for registering. Text 4343 to receive free updates on medicare.", is spam.
