In [140]:
# Import pandas
import pandas as pd
# Import the required dependencies from sklearn
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

# Set the column width to view the text message data.
pd.set_option('max_colwidth', 200)

# Import Gradio
import gradio as gr

In [141]:
def sms_classification(sms_text_df):
    """
    Perform SMS classification using a pipeline with TF-IDF vectorization and Linear Support Vector Classification.

    Parameters:
    - sms_text_df (pd.DataFrame): DataFrame containing 'text_message' and 'label' columns for SMS classification.

    Returns:
    - text_clf (Pipeline): Fitted pipeline model for SMS classification.

    This function takes a DataFrame with 'text_message' and 'label' columns, splits the data into
    training and testing sets, builds a pipeline with TF-IDF vectorization and Linear Support Vector
    Classification, and fits the model to the training data. 
    The fitted pipeline is returned to make future predictions.
    """
    # Set the features variable to the text message column.
    X=sms_text_df["text_message"]
    # Set the target variable to the "label" column.
    y=sms_text_df["label"]   

    # Split data into training and testing and set the test_size = 33%
    X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.33,random_state=42)

    # Build a pipeline to transform the test set to compare to the training set.
    text_clf = Pipeline([('tfidf', TfidfVectorizer(stop_words='english')),
                     ('clf', LinearSVC()),
                     ])

    # Fit the model to the transformed training data and return model.
    text_clf.fit(X_train,y_train)

    
    return text_clf, X_train, X_test, y_train, y_test

In [142]:
# Load the dataset into a DataFrame

messages_df=pd.read_csv("./Resources/SMSSpamCollection.csv")
display(messages_df.head())
print(messages_df.info())
messages_df["label"].value_counts()

Unnamed: 0,label,text_message
0,ham,"Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives around here though"


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5572 entries, 0 to 5571
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   label         5572 non-null   object
 1   text_message  5572 non-null   object
dtypes: object(2)
memory usage: 87.2+ KB
None


label
ham     4825
spam     747
Name: count, dtype: int64

In [143]:
# Call the sms_classification function with the DataFrame and set the result to the "text_clf" variable
text_clf, X_train, X_test, y_train, y_test = sms_classification(messages_df)

#validate model
train_accuracy = text_clf.score(X_train, y_train)
test_accuracy = text_clf.score(X_test, y_test)

#show results
display(text_clf)
print(f"The training accuracy is: {train_accuracy:.3f}")
print(f"The test accuracy is: {test_accuracy:.3f}")

The training accuracy is: 1.000
The test accuracy is: 0.989


In [None]:
# Create a function called `sms_prediction` that takes in the SMS text and predicts the whether the text is "not spam" or "spam". 
# The function should return the SMS message, and say whether the text is "not spam" or "spam".
def sms_prediction(text,mode,html=False):
    """
    Predict the spam/ham classification of a given text message using a pre-trained model.

    Parameters:
    - text (str): The text message to be classified.

    Returns:
    - str: A message indicating whether the text message is classified as spam or not.

    This function takes a text message and a pre-trained pipeline model, then predicts the
    spam/ham classification of the text. The result is a message stating whether the text is
    classified as spam or not.
    """
   
    # Create a variable that will hold the prediction of a new text.
    prediction=text_clf.predict([text])[0]
    # Using a conditional if the prediction is "ham" return the message:
    if prediction=="ham" and html==False:
        prediction_label=prediction
        prediction_text=(f'The text message: "{text}", is not spam.')
    
    elif prediction=="ham" and html==True:
        prediction_label='''
            <div style="border:2px solid #28a745; padding:10px; border-radius:10px;">
            <h4 style="margin:0 0 10px 0;">Classification Result:</h4>
            <span style="color:#28a745; font-weight:bold;">NOT SPAM</span>
            </div>'''
        prediction_text= f'''
            <div style="border:2px solid #28a745; padding:10px; border-radius:10px;">
            <h4 style="margin:0 0 10px 0;">Our app has determined:</h4>
            The text message: "{text}", is <span style="color:#28a745;font-weight:bold;">not spam</span>.'
            </div>'''
    
    elif prediction=="spam" and html==False:
        prediction_label=prediction
        prediction_text=(f'The text message: "{text}", is spam.')
    
    else: 
        prediction_label='''
            <div style="border:2px solid Red; padding:10px; border-radius:10px;">
            <h4 style="margin:0 0 10px 0;">Classification Result:</h4>
            <span style="color:Red; font-weight:bold;">SPAM</span>
            </div>'''
        prediction_text = f'''
            <div style="border:2px solid Red; padding:10px; border-radius:10px;">
            <h4 style="margin:0 0 10px 0;">Our app has determined:</h4>
            The text message: "{text}" is <span style="color:red; font-weight:bold;">spam</span>.
            </div>'''
    
    #create if for return for modes
    if mode=="full":
        output=prediction_text, prediction_label
    elif mode=="message":
        output=prediction_text
    else:
        output=prediction_label

    return output
    

In [145]:
#test outside gradio for functionality and comparison
text1 = "You are a lucky winner of $5000!!"
text2 = "You won 2 free tickets to the Super Bowl."
text3 = "You won 2 free tickets to the Super Bowl text us to claim your prize."
text4 = "Thanks for registering. Text 4343 to receive free updates on medicare."

#make test messages into a list
test_list=[text1,text2,text3,text4]

#make and empty prediction list
prediction_list=[]
prediction_message_list=[] 

#for loop to populate prediction list
for t in test_list:
                  prediction_text, prediction=sms_prediction(t,"full",html=False)
                  prediction_list.append(prediction)
                  prediction_message_list.append(prediction_text)

print(prediction_list)
print("\n".join(prediction_message_list))

['ham', 'ham', 'spam', 'spam']
The text message: "You are a lucky winner of $5000!!", is not spam.
The text message: "You won 2 free tickets to the Super Bowl.", is not spam.
The text message: "You won 2 free tickets to the Super Bowl text us to claim your prize.", is spam.
The text message: "Thanks for registering. Text 4343 to receive free updates on medicare.", is spam.


In [148]:
# Create a sms_app that takes a textbox for the inputs and has a textbox for the output.  
# Provide labels for each textbox. 
sms_app=gr.Interface(
    fn=lambda text: sms_prediction(text, "message", html=False),
    inputs=[
        gr.Textbox(label="What is the text message you want to test?.")],
    outputs=[
        gr.Textbox(label="Our app has determined:")]
)
    
# Launch the app.
sms_app.launch(show_error=True)

* Running on local URL:  http://127.0.0.1:7876

To create a public link, set `share=True` in `launch()`.




## Test the following text messages. 

---
### See cell 6 and 8.  Results are consistent between with and without the app (s).
1. You are a lucky winner of $5000! --> not spam
2. You won 2 free tickets to the Super Bowl.  --> not spam
3. You won 2 free tickets to the Super Bowl text us to claim your prize. --> spam
4. Thanks for registering. Text 4343 to receive free updates on medicare.  --> spam

In [153]:
# gradio with HTML
# Povide labels for each textbox. 
sms_app_html=gr.Interface(
    fn=lambda text: sms_prediction(text, "full", html=True),
    inputs=[
        gr.Textbox(label="What is the text message you want to test?")],
    outputs=[
        gr.HTML(),
        gr.HTML()]
)
    
# Launch the app.
sms_app_html.launch(show_error=True, share=True)

* Running on local URL:  http://127.0.0.1:7880

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


