In [1]:
import pandas as pd
import gradio as gr
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

#from sklearn.model_selection import train_test_split
#from sklearn.pipeline import Pipeline
#from sklearn.svm import LinearSVC


In [2]:
# Set the column width to view the text message data.
pd.set_option('max_colwidth', 200)

In [3]:
# Restore models using pickle

# Restore the trained PassiveAggressiveClassifer model
pac_model = pickle.load(open('Resources/pa_classfier.pkl', 'rb'))

# Restore the TD-IDF vectorizer
tfid_vectorizer = pickle.load(open('Resources/tfid_vectorizer.pkl', 'rb'))

print("pac_model and tfid_vectorizer restored")

pac_model and tfid_vectorizer restored


In [40]:
# Define function to clean articles by convering all text to lower case, removing unnecessary punctuation,
# removing numbers, stopwords, tokenizing, and Lemmatizing the data. 
def clean_text(text):
    """
    XXXXXXXPredict the spam/ham classification of a given text message using a pre-trained model.

    XXXXParameters:
    XXXXX- text (str): The text message to be classified.

    XXXXXReturns:
    XXXXX- str: A message indicating whether the text message is classified as spam or not.

    XXXXThis function takes a text message and a pre-trained pipeline model, then predicts the
    XXXXspam/ham classification of the text. The result is a message stating whether the text is
    XXXXclassified as spam or not.
    """
    # Convert text to lowercase
    text = text.lower()

    # Remove URLs
    text = re.sub(r'https?://\S+|www\.\S+', '', text)

    # Remove HTML tags
    text = re.sub(r'<.*?>', '', text)

    # Remove punctuation
    text = re.sub(r'[^\w\s]', '', text)

    # Remove numbers
    text = re.sub(r'\d+', '', text)

    # Tokenize the text
    words = word_tokenize(text)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]
    
    # Initialize the WordNet lemmatizer
    lemmatizer = WordNetLemmatizer()
    
    # Lemmatize the words
    words = [lemmatizer.lemmatize(word) for word in words]

    # Join the words back into a single string
    cleaned_text = ' '.join(words)
    
    return cleaned_text


In [11]:
# Test clean_text function to confirm that it works as expected 
dirty_text = "This is an example sentence with some numbers like 123, and special characters !@#$, as well as URLs https://www.example.com"
cleaned_text = clean_text(dirty_text)
print(cleaned_text)

example sentence number like special character well url


In [46]:
# Define function to predict article. 
def article_prediction(text):
    """
    XXXXXXXPredict the spam/ham classification of a given text message using a pre-trained model.

    XXXXParameters:
    XXXXX- text (str): The text message to be classified.

    XXXXXReturns:
    XXXXX- str: A message indicating whether the text message is classified as spam or not.

    XXXXThis function takes a text message and a pre-trained pipeline model, then predicts the
    XXXXspam/ham classification of the text. The result is a message stating whether the text is
    XXXXclassified as spam or not.
    """
    # Clean text string 
    text_clean = clean_text(text)
    print(text_clean)
    
    # Convert text string to list for the Vectorizer 
    text_clean = [text_clean]
        
    # Vectorize text string 
    text_tfid = tfid_vectorizer.transform(text_clean)
    
    # Predict Real or Fake 
    text_prediction = pac_model.predict(text_tfid)[0]
    
    if text_prediction == 1:
        return 'Real'
    else:
        return 'Fake'


In [47]:
# Test article_prediction function with Real article to confirm that it works as expected 
pred_test_text = "DUBAI (Reuters) - The United Arab Emirates on Sunday denied a report that \
Yemen s Houthi group had fired a missile toward a nuclear plant in the UAE, state news agency \
WAM reported on its Twitter account. It quoted the UAE s emergency and crisis management department \
as saying the UAE possessed a missile defense system that could deal with any such threats and \
adding the al-Barakah nuclear plant was secure against all eventualities."

pred_test = article_prediction(pred_test_text)
print(pred_test)

dubai reuters united arab emirate sunday denied report yemen houthi group fired missile toward nuclear plant uae state news agency wam reported twitter account quoted uae emergency crisis management department saying uae possessed missile defense system could deal threat adding albarakah nuclear plant secure eventuality
Real


In [43]:
# Create function that measures the sentiment of a sentence using SentimentIntensityAnalyzer
def get_sentiment_rating(sentence):
    # Create a SentimentIntensityAnalyzer object
    vader_sentiment = SentimentIntensityAnalyzer()

    # The polarity_scores method of SentimentIntensityAnalyzer returns a sentiment dictionary
    # that contains positive, negative, neutral, and compound scores.
    vader_sentiment_dict = vader_sentiment.polarity_scores(sentence)
     
    #print(f'The overall sentiment dictionary is: {vader_sentiment_dict}')
    #print(f'sentence sentiment is rated {vader_sentiment_dict["neg"]*100}% Negative')
    #print(f'sentence sentiment is rated {vader_sentiment_dict["neu"]*100}% Neutral')
    #print(f'sentence sentiment is rated {vader_sentiment_dict["pos"]*100}% Positive')

    # Determine if sentiment is positive, negative or neutral
    if vader_sentiment_dict['compound'] >= 0.05 :
        sentiment_rating = 'Positive'
    elif vader_sentiment_dict['compound'] <= - 0.05 :
        sentiment_rating = 'Negative'
    else :
        sentiment_rating = 'Neutral'
    
    #print(f'Sentence Overall is rated {sentiment_rating}')
    return sentiment_rating


In [44]:
# Test get_sentiment_rating function
test_sentence = "Google is a great place to search for answers when you don't have any!"
test_result = get_sentiment_rating(test_sentence)
test_result

'Positive'

In [45]:
# Test get_sentiment_rating function
test_sentence = pred_test_text[:70]
test_result = get_sentiment_rating(test_sentence)
test_result

'Neutral'

In [35]:
# Create new cleaned_text column by applying the clean_text function to the article text 
#df_merged_articles['cleaned_text'] = df_merged_articles['text'].apply(clean_text)
#df_merged_articles.head()

In [36]:
# Load the dataset into a DataFrame
#sms_text_df = pd.read_csv('Resources/SMSSpamCollection.csv')
#sms_text_df.head()

In [37]:
# Call the sms_classification function with the DataFrame and set the result to the "text_clf" variable
#text_clf = sms_classification(sms_text_df)

In [48]:
# Create a function called `sms_prediction` that takes in the SMS text and predicts the whether the text is "not spam" or "spam". 
# The function should return the SMS message, and say whether the text is "not spam" or "spam".
def article_sentiment_prediction(text):
    """
    xxxxxxPredict the spam/ham classification of a given text message using a pre-trained model.

    ccccccccParameters:
    - text (str): The text message to be classified.

    xxxxxxxxxxReturns:
    - str: A message indicating whether the text message is classified as spam or not.

    xxxxxThis function takes a text message and a pre-trained pipeline model, then predicts the
    xxxxxspam/ham classification of the text. The result is a message stating whether the text is
    xxxclassified as spam or not.
    """
    # Get sentiment score
    text_sentiment = get_sentiment_rating(text)

    # Create a variable that will hold the prediction of a new text.
    text_prediction = article_prediction(text)
    
    # Using a conditional if the prediction is "ham" return the message:
    # f'The text message: "{text}", is not spam.' Else, return f'The text message: "{text}", is spam.'
    #if text_prediction == "Real":
    #    message = f'Text Sentiment: {text_sentiment};  The text is: {text_prediction}; Text Analyzed: "{text}".'
    #else:
    #    message = f'Text Sentiment: {text_sentiment};  The text is: {text_prediction}; Text Analyzed: "{text}".'
    message = f'Text Sentiment: {text_sentiment};  The text is: {text_prediction}; Text Analyzed: "{text}".'
    return message

In [49]:
# Test article_sentiment_prediction function
pred_test_text = "DUBAI (Reuters) - The United Arab Emirates on Sunday denied a report that \
Yemen s Houthi group had fired a missile toward a nuclear plant in the UAE, state news agency \
WAM reported on its Twitter account. It quoted the UAE s emergency and crisis management department \
as saying the UAE possessed a missile defense system that could deal with any such threats and \
adding the al-Barakah nuclear plant was secure against all eventualities."
test_result = article_sentiment_prediction(pred_test_text)
test_result

dubai reuters united arab emirate sunday denied report yemen houthi group fired missile toward nuclear plant uae state news agency wam reported twitter account quoted uae emergency crisis management department saying uae possessed missile defense system could deal threat adding albarakah nuclear plant secure eventuality


'Text Sentiment: Negative;  The text is: Real; Text Analyzed: "DUBAI (Reuters) - The United Arab Emirates on Sunday denied a report that Yemen s Houthi group had fired a missile toward a nuclear plant in the UAE, state news agency WAM reported on its Twitter account. It quoted the UAE s emergency and crisis management department as saying the UAE possessed a missile defense system that could deal with any such threats and adding the al-Barakah nuclear plant was secure against all eventualities.".'

In [50]:
# Create a sms_app that takes a textbox for the inputs and has a textbox for the output.  
# Povide labels for each textbox. 
app = gr.Interface(fn=article_sentiment_prediction,
                   inputs=gr.Textbox(label="What is the text that you want to test?"), 
                   outputs=gr.Textbox(label="Our app has determined:"))
    
# Launch the app.
app.launch(share=True)

Running on local URL:  http://127.0.0.1:7860

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.




washington reuters president donald trump name david kautter acting commissioner u internal revenue service white house said statement thursday kautter assistant secretary treasury tax policy would become acting head federal governmentâs revenue collection service effective nov white house said
armed civilian protect defend u military recruitment center muslim terrorist made clear intend target brave men woman defend nation really much ask allow defend themselvescivilians semiautomatic carbine eating chickfila guarding marine whitehouse pictwittercomofvtpsqeg bob owen bob_owens july ht weasel zipper


In [51]:
#Close gradio application when done testing
app.close()

Closing server running on port: 7860
