In [14]:
!pip install -q tensorflow numpy keras nltk pickle-mixin


[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [15]:
import numpy as np
import pandas as pd
import re
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Download required NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Asus\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Asus\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Asus\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Asus\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [16]:
# ============== DATA PREPROCESSING ==============

class TweetPreprocessor:
    def __init__(self):
        self.stop_words = set(stopwords.words('english'))
        self.lemmatizer = WordNetLemmatizer()
    
    def clean_tweet(self, text):
        """Clean and preprocess tweet text"""
        if pd.isna(text):
            return ""
        
        # Convert to string and lowercase
        text = str(text).lower()
        
        # Remove URLs
        text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
        
        # Remove user mentions but keep hashtags as they might be meaningful
        text = re.sub(r'@\w+', '', text)
        
        # Remove hashtag symbol but keep the word
        text = re.sub(r'#', '', text)
        
        # Remove special characters and digits but keep spaces
        text = re.sub(r'[^a-zA-Z\s]', '', text)
        
        # Remove extra whitespace
        text = ' '.join(text.split())
        
        # For gaming tweets, we might want to keep some gaming-specific terms
        # Don't remove all stopwords for better context in gaming tweets
        tokens = text.split()
        
        # Only remove very common stopwords that don't affect sentiment
        very_common_stops = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for'}
        tokens = [word for word in tokens if word not in very_common_stops or len(word) > 2]
        
        return ' '.join(tokens)

In [17]:
class SentimentPredictor:
    """Class for loading and using the saved model"""
    
    def __init__(self, model_name='twitter_sentiment'):
        # Load model
        self.model = load_model(f'{model_name}_model.keras')
        
        # Load tokenizer
        with open(f'{model_name}_tokenizer.pickle', 'rb') as handle:
            self.tokenizer = pickle.load(handle)
        
        # Load config
        with open(f'{model_name}_config.pickle', 'rb') as handle:
            self.config = pickle.load(handle)
        
        # Initialize preprocessor
        self.preprocessor = TweetPreprocessor()
        
        print("✅ Model loaded and ready for predictions!")
    
    def predict_sentiment(self, text):
        """Predict sentiment for a single text"""
        # Clean text
        cleaned = self.preprocessor.clean_tweet(text)
        
        # Tokenize and pad
        sequence = self.tokenizer.texts_to_sequences([cleaned])
        padded = pad_sequences(sequence, maxlen=self.config['max_len'], 
                             padding='post', truncating='post')
        
        # Predict
        prediction = self.model.predict(padded, verbose=0)
        predicted_class = np.argmax(prediction[0])
        confidence = prediction[0][predicted_class]
        
        return {
            'text': text,
            'cleaned_text': cleaned,
            'sentiment': self.config['sentiment_labels'][predicted_class],
            'confidence': float(confidence),
            'probabilities': {
                label: float(prob) 
                for label, prob in zip(self.config['sentiment_labels'], prediction[0])
            }
        }
    
    def predict_batch(self, texts):
        """Predict sentiment for multiple texts"""
        results = []
        for text in texts:
            results.append(self.predict_sentiment(text))
        return results

In [18]:
# ============== TEST THE SAVED MODEL ==============

print("\n" + "="*50)
print("TESTING SAVED MODEL")
print("="*50)

# Initialize predictor
predictor = SentimentPredictor('twitter_sentiment')

# Test with new tweets
test_tweets = [
    "This is the best day ever! So happy!",
    "I hate this so much, worst experience",
    "It's okay, nothing special really",
    "Absolutely love the new features!",
    "Complete disaster, want my money back"
]

print("\nSample Predictions:")
print("-" * 50)

for tweet in test_tweets:
    result = predictor.predict_sentiment(tweet)
    print(f"Tweet: {result['text']}")
    print(f"Sentiment: {result['sentiment']} (Confidence: {result['confidence']:.2%})")
    print(f"Probabilities: {result['probabilities']}")
    print("-" * 50)

print("\n✅ Model training and saving complete!")
print("\nSaved files:")
print("1. twitter_sentiment_model.keras - The trained model")
print("2. twitter_sentiment_tokenizer.pickle - Tokenizer for text processing")
print("3. twitter_sentiment_config.pickle - Model configuration")
print("\nUse the SentimentPredictor class to load and use the model in your application!")


TESTING SAVED MODEL
✅ Model loaded and ready for predictions!

Sample Predictions:
--------------------------------------------------
Tweet: This is the best day ever! So happy!
Sentiment: Positive (Confidence: 99.94%)
Probabilities: {'Negative': 0.0005109712365083396, 'Neutral': 4.321126834838651e-05, 'Positive': 0.9994457364082336}
--------------------------------------------------
Tweet: I hate this so much, worst experience
Sentiment: Negative (Confidence: 100.00%)
Probabilities: {'Negative': 0.9999998807907104, 'Neutral': 4.1574967468704926e-08, 'Positive': 1.5816637244370213e-07}
--------------------------------------------------
Tweet: It's okay, nothing special really
Sentiment: Positive (Confidence: 88.55%)
Probabilities: {'Negative': 0.0753268375992775, 'Neutral': 0.03918066993355751, 'Positive': 0.8854925036430359}
--------------------------------------------------
Tweet: Absolutely love the new features!
Sentiment: Positive (Confidence: 95.19%)
Probabilities: {'Negative': 