In [None]:
import re
import pickle
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Define the preprocessing function (must be identical to training preprocessing)
def preprocess(tweet):
    # Remove mentions (@user) and URLs
    tweet = re.sub(r'@\w+httpS+|www\S+', '', tweet)
    # Remove special characters and numbers, keep only letters and spaces
    tweet = re.sub(r'[^a-zA-Z\s]', '', tweet)
    # Convert to lowercase
    tweet = tweet.lower()
    # Remove extra spaces
    tweet = re.sub(r'\s+', ' ', tweet).strip()
    return tweet

# Prediction function using the loaded tokenizer and model
def predict_sentiment(tweet, tokenizer, model, max_length=50):
    # Step 1: Preprocess the tweet
    cleaned_tweet = preprocess(tweet)
    
    # Step 2: Convert the cleaned tweet to a sequence of integers
    sequence = tokenizer.texts_to_sequences([cleaned_tweet])
    
    # Step 3: Pad the sequence to match the model's expected input length
    pad_sequence = pad_sequences(sequence, maxlen=max_length, padding='post', truncating='post')
    
    # Step 4: Make the prediction using the trained model
    prediction = model.predict(pad_sequence)[0][0]
    
    # Interpret the prediction: if prediction >= 0.5, it's positive, otherwise negative
    sentiment = "Positive" if prediction >= 0.5 else "Negative"
    probability = prediction if sentiment == "Positive" else 1 - prediction
    
    return sentiment, probability

# Main function to load the artifacts and run a prediction
if __name__ == "__main__":
    # Load the saved model
    model = load_model("twitter_sentiment_train.h5")
    
    # Load the saved tokenizer
    with open("tokenizer.pickle", "rb") as handle:
        tokenizer = pickle.load(handle)
    
    # Example tweet for prediction
    new_tweet = "This is a wonderful day"
    sentiment, probability = predict_sentiment(new_tweet, tokenizer, model)
    
    print(f"Sentiment: {sentiment}, Probability: {probability}")


Sentiment: Negative, Probability: 0.9880631268024445


## Using Hugging face model

In [3]:
from transformers import pipeline

sentiment_analyzer = pipeline('sentiment-analysis')

sentence = "The quality is terrible, and the customer support was unhelpful."

result = sentiment_analyzer(sentence)[0]

print(sentence)
print(f"{result['label']}:(score:{result['score']:.3f})")

# Example usage
sentences = [
    "I like this guy very much!",
    "This was a very bad experience."
]

for sentence in sentences:
    result = sentiment_analyzer(sentence)[0]
    print(f"Sentence: '{sentence}'")
    print(f"Sentiment: {result['label']} (Score: {result['score']:.4f})\n")


  from .autonotebook import tqdm as notebook_tqdm
No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)


The quality is terrible, and the customer support was unhelpful.
NEGATIVE:(score:1.000)
Sentence: 'I like this guy very much!'
Sentiment: POSITIVE (Score: 0.9999)

Sentence: 'This was a very bad experience.'
Sentiment: NEGATIVE (Score: 0.9998)

