In [None]:
!pip install tensorflow pandas scikit-learn



In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [None]:
class ScalableSentimentAnalyzer:
    def __init__(self, max_features=10000, max_len=100):  # Reduced max_len
        self.max_features = max_features
        self.max_len = max_len
        self.tokenizer = Tokenizer(num_words=max_features, oov_token='<OOV>')

    def preprocess_text(self, text):
        # Simplified preprocessing for speed
        return ' '.join(str(text).lower().split())

    def load_data_in_chunks(self, filepath, chunksize=250000):  # Increased chunksize
        print("Loading data in chunks...")
        texts, labels = [], []

        # Use generator expression for memory efficiency
        chunks = pd.read_csv(filepath,
                           encoding='latin-1',
                           chunksize=chunksize,
                           header=None,
                           usecols=[0, 5])  # Only load needed columns

        for chunk in chunks:
            chunk_texts = chunk.iloc[:, 1].apply(self.preprocess_text)
            chunk_labels = chunk.iloc[:, 0].map({0: 0, 4: 1})

            texts.extend(chunk_texts)
            labels.extend(chunk_labels)

            print(f"Processed {len(texts)} samples...")

        return np.array(texts), np.array(labels)  # Convert to numpy arrays

    def prepare_data(self, texts, labels):
        # Use smaller test size
        X_train, X_test, y_train, y_test = train_test_split(
            texts, labels, test_size=0.1, random_state=42
        )

        # Fit tokenizer on a subset of training data
        self.tokenizer.fit_on_texts(X_train[:500000])

        # Process in batches
        def tokenize_and_pad(texts):
            sequences = self.tokenizer.texts_to_sequences(texts)
            return pad_sequences(sequences, maxlen=self.max_len)

        X_train_pad = tokenize_and_pad(X_train)
        X_test_pad = tokenize_and_pad(X_test)

        return X_train_pad, X_test_pad, y_train, y_test

    def create_model(self):
        model = Sequential([
            Embedding(self.max_features, 64, input_length=self.max_len),  # Reduced embedding dim
            Bidirectional(LSTM(64, return_sequences=True)),  # Added bidirectional LSTM
            Bidirectional(LSTM(32)),  # Second LSTM layer
            Dense(32, activation='relu'),
            Dropout(0.3),  # Reduced dropout
            Dense(1, activation='sigmoid')
        ])

        # Use mixed precision
        tf.keras.mixed_precision.set_global_policy('mixed_float16')

        optimizer = Adam(learning_rate=0.001)
        optimizer = tf.keras.mixed_precision.LossScaleOptimizer(optimizer)

        model.compile(
            optimizer=optimizer,
            loss='binary_crossentropy',
            metrics=['accuracy']
        )

        return model

    def train(self, filepath):
        # Enable memory growth for GPU
        gpus = tf.config.experimental.list_physical_devices('GPU')
        if gpus:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)

        # Load and prepare data
        texts, labels = self.load_data_in_chunks(filepath)
        X_train, X_test, y_train, y_test = self.prepare_data(texts, labels)

        # Create model
        model = self.create_model()

        # Use more efficient callbacks
        checkpoint = tf.keras.callbacks.ModelCheckpoint(
            '/content/drive/My Drive/Sentiment_Analysis/Models/best_model.h5',
            save_best_only=True,
            monitor='val_accuracy'
        )

        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_accuracy',
            patience=2,  # Reduced patience
            restore_best_weights=True
        )

        # Train with larger batch size and reduced epochs
        history = model.fit(
            X_train, y_train,
            validation_split=0.1,  # Reduced validation split
            epochs=5,  # Reduced epochs
            batch_size=512,  # Increased batch size
            callbacks=[checkpoint, early_stopping]
        )

        # Evaluate
        test_loss, test_accuracy = model.evaluate(X_test, y_test)
        print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

        return model

    def predict(self, model, text):
        processed_text = self.preprocess_text(text)
        seq = self.tokenizer.texts_to_sequences([processed_text])
        padded_seq = pad_sequences(seq, maxlen=self.max_len)

        prediction = model.predict(padded_seq, verbose=0)[0][0]
        sentiment = 'Positive' if prediction >= 0.5 else 'Negative'
        confidence = prediction * 100 if prediction >= 0.5 else (1-prediction) * 100

        return sentiment, confidence

In [None]:
def main():
    filepath = '/content/drive/My Drive/Sentiment_Analysis/Dataset/training.1600000.processed.noemoticon.csv'
    analyzer = ScalableSentimentAnalyzer()
    model = analyzer.train(filepath)

    test_tweets = [
        "I absolutely love this product!",
        "This is the worst experience ever.",
        "I'm not sure how to feel about this."
    ]

    for tweet in test_tweets:
        sentiment, confidence = analyzer.predict(model, tweet)
        print(f"Tweet: {tweet}")
        print(f"Sentiment: {sentiment}")
        print(f"Confidence: {confidence:.2f}%\n")

if __name__ == '__main__':
    main()

Loading data in chunks...
Processed 250000 samples...
Processed 500000 samples...
Processed 750000 samples...
Processed 1000000 samples...
Processed 1250000 samples...
Processed 1500000 samples...
Processed 1600000 samples...




Epoch 1/5
[1m2532/2532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.7757 - loss: 0.4655



[1m2532/2532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 45ms/step - accuracy: 0.7757 - loss: 0.4655 - val_accuracy: 0.8182 - val_loss: 0.3989
Epoch 2/5
[1m2531/2532[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 43ms/step - accuracy: 0.8237 - loss: 0.3912



[1m2532/2532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 46ms/step - accuracy: 0.8237 - loss: 0.3912 - val_accuracy: 0.8242 - val_loss: 0.3879
Epoch 3/5
[1m2532/2532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.8332 - loss: 0.3731



[1m2532/2532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 45ms/step - accuracy: 0.8332 - loss: 0.3731 - val_accuracy: 0.8257 - val_loss: 0.3830
Epoch 4/5
[1m2531/2532[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 43ms/step - accuracy: 0.8413 - loss: 0.3573



[1m2532/2532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 46ms/step - accuracy: 0.8413 - loss: 0.3573 - val_accuracy: 0.8284 - val_loss: 0.3814
Epoch 5/5
[1m2532/2532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 46ms/step - accuracy: 0.8483 - loss: 0.3438 - val_accuracy: 0.8270 - val_loss: 0.3884
[1m5000/5000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 10ms/step - accuracy: 0.8290 - loss: 0.3787
Test Accuracy: 83.00%
Tweet: I absolutely love this product!
Sentiment: Positive
Confidence: 98.84%

Tweet: This is the worst experience ever.
Sentiment: Negative
Confidence: 99.50%

Tweet: I'm not sure how to feel about this.
Sentiment: Negative
Confidence: 96.11%



In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np
import pandas as pd

class SentimentTester:
    def __init__(self, model_path, max_features=10000, max_len=100):
        # Load the trained model
        self.model = tf.keras.models.load_model(model_path)
        self.max_len = max_len

        # Initialize tokenizer
        self.tokenizer = Tokenizer(num_words=max_features, oov_token='<OOV>')

        # Create and fit tokenizer with sample data
        sample_texts = [
            "this is a positive review very good excellent amazing",
            "this is a negative review bad terrible horrible",
            "neutral review okay average normal",
            "great product highly recommend would buy again",
            "worst experience ever do not recommend",
            "decent quality but expensive price tag",
            "outstanding service exceptional quality perfect",
            "poor customer service waste of money",
            "mediocre performance not impressed",
            "absolutely fantastic exceeded expectations",
            # Add more sample texts covering various sentiments and vocabulary
            "love this product amazing features",
            "hate everything about this terrible",
            "not bad but could be better average",
            "best purchase ever made wonderful",
            "complete disaster avoid at all costs",
            "somewhat satisfied with the results",
            "incredible performance outstanding quality",
            "disappointing experience would not recommend",
            "mixed feelings about this product",
            "exceeded all expectations phenomenal"
        ]

        # Fit the tokenizer with sample texts
        self.tokenizer.fit_on_texts(sample_texts)

    def preprocess_text(self, text):
        # Basic preprocessing
        text = str(text).lower()
        text = ' '.join(text.split())
        return text

    def predict_sentiment(self, text):
        # Preprocess the text
        processed_text = self.preprocess_text(text)

        # Convert to sequence and pad
        seq = self.tokenizer.texts_to_sequences([processed_text])
        padded_seq = pad_sequences(seq, maxlen=self.max_len)

        # Make prediction
        prediction = float(self.model.predict(padded_seq, verbose=0)[0][0])

        # Get sentiment and confidence
        sentiment = 'Positive' if prediction >= 0.5 else 'Negative'
        confidence = prediction * 100 if prediction >= 0.5 else (1-prediction) * 100

        return {
            'text': text,
            'sentiment': sentiment,
            'confidence': confidence,
            'raw_score': prediction
        }

def main():
    # Replace with your model path
    MODEL_PATH = '/content/drive/My Drive/Sentiment_Analysis/Models/best_model1.h5'

    # Initialize tester
    try:
        tester = SentimentTester(MODEL_PATH)
        print("Model and tokenizer initialized successfully!")
    except Exception as e:
        print(f"Error initializing tester: {str(e)}")
        return

    print("\n=== Interactive Testing Mode ===")
    print("Enter your messages (type 'quit' to exit):\n")

    while True:
        try:
            user_input = input("Enter text to analyze: ")
            if user_input.lower() == 'quit':
                break

            result = tester.predict_sentiment(user_input)
            print(f"\nText: {result['text']}")
            print(f"Sentiment: {result['sentiment']}")
            print(f"Confidence: {result['confidence']:.2f}%")
            print("-" * 50)

        except Exception as e:
            print(f"Error during prediction: {str(e)}")
            continue

if __name__ == "__main__":
    main()



Model and tokenizer initialized successfully!

=== Interactive Testing Mode ===
Enter your messages (type 'quit' to exit):

Enter text to analyze: i want to die 

Text: i want to die 
Sentiment: Positive
Confidence: 51.51%
--------------------------------------------------
Enter text to analyze: i like her 

Text: i like her 
Sentiment: Positive
Confidence: 52.05%
--------------------------------------------------
Enter text to analyze: hi want to kill someone 

Text: hi want to kill someone 
Sentiment: Positive
Confidence: 50.78%
--------------------------------------------------


KeyboardInterrupt: Interrupted by user

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional, Conv1D, GlobalMaxPooling1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import re
import string

class ScalableSentimentAnalyzer:
    def __init__(self, max_features=50000, max_len=150):  # Increased features and length
        self.max_features = max_features
        self.max_len = max_len
        self.tokenizer = Tokenizer(num_words=max_features, oov_token='<OOV>')

    def preprocess_text(self, text):
        """Enhanced text preprocessing"""
        # Convert to lowercase
        text = str(text).lower()

        # Remove URLs
        text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)

        # Remove user mentions
        text = re.sub(r'@\w+', '', text)

        # Remove hashtags but keep the text
        text = re.sub(r'#(\w+)', r'\1', text)

        # Remove punctuation
        text = text.translate(str.maketrans('', '', string.punctuation))

        # Remove extra whitespace
        text = ' '.join(text.split())

        return text

    def load_data_in_chunks(self, filepath, chunksize=250000):
        print("Loading data in chunks...")
        texts, labels = [], []

        chunks = pd.read_csv(filepath,
                           encoding='latin-1',
                           chunksize=chunksize,
                           header=None,
                           usecols=[0, 5])

        for chunk in chunks:
            chunk_texts = chunk.iloc[:, 1].apply(self.preprocess_text)
            chunk_labels = chunk.iloc[:, 0].map({0: 0, 4: 1})

            texts.extend(chunk_texts)
            labels.extend(chunk_labels)

            print(f"Processed {len(texts)} samples...")

        return np.array(texts), np.array(labels)

    def prepare_data(self, texts, labels):
        # Stratified split to maintain class distribution
        X_train, X_test, y_train, y_test = train_test_split(
            texts, labels, test_size=0.15, random_state=42, stratify=labels
        )

        # Fit tokenizer on full training data
        print("Fitting tokenizer on training data...")
        self.tokenizer.fit_on_texts(X_train)

        def tokenize_and_pad(texts):
            sequences = self.tokenizer.texts_to_sequences(texts)
            return pad_sequences(sequences, maxlen=self.max_len, padding='post', truncating='post')

        X_train_pad = tokenize_and_pad(X_train)
        X_test_pad = tokenize_and_pad(X_test)

        return X_train_pad, X_test_pad, y_train, y_test

    def create_model(self):
        model = Sequential([
            # Embedding layer
            Embedding(self.max_features, 128, input_length=self.max_len),

            # CNN layers for feature extraction
            Conv1D(64, 5, activation='relu'),
            Conv1D(64, 4, activation='relu'),
            Conv1D(64, 3, activation='relu'),

            # Bidirectional LSTM layers
            Bidirectional(LSTM(64, return_sequences=True)),
            Dropout(0.2),
            Bidirectional(LSTM(32, return_sequences=True)),
            Dropout(0.2),
            Bidirectional(LSTM(16)),

            # Dense layers
            Dense(64, activation='relu'),
            Dropout(0.3),
            Dense(32, activation='relu'),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])

        # Use mixed precision
        tf.keras.mixed_precision.set_global_policy('mixed_float16')

        optimizer = Adam(learning_rate=0.0005)  # Reduced learning rate
        optimizer = tf.keras.mixed_precision.LossScaleOptimizer(optimizer)

        model.compile(
            optimizer=optimizer,
            loss='binary_crossentropy',
            metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
        )

        return model

    def train(self, filepath):
        # GPU memory growth
        gpus = tf.config.experimental.list_physical_devices('GPU')
        if gpus:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)

        # Load and prepare data
        texts, labels = self.load_data_in_chunks(filepath)
        X_train, X_test, y_train, y_test = self.prepare_data(texts, labels)

        # Create model
        model = self.create_model()

        # Callbacks
        checkpoint = tf.keras.callbacks.ModelCheckpoint(
            '/content/drive/My Drive/Sentiment_Analysis/Models/best_model1.h5',
            save_best_only=True,
            monitor='val_accuracy'
        )

        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_accuracy',
            patience=3,
            restore_best_weights=True
        )

        reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.2,
            patience=2,
            min_lr=0.00001
        )

        # Train
        history = model.fit(
            X_train, y_train,
            validation_split=0.15,
            epochs=10,
            batch_size=256,
            callbacks=[checkpoint, early_stopping, reduce_lr]
        )

        # Evaluate
        test_metrics = model.evaluate(X_test, y_test)
        print("\nTest Results:")
        for metric, value in zip(model.metrics_names, test_metrics):
            print(f"{metric}: {value:.4f}")

        return model, history

    def predict(self, model, text):
        processed_text = self.preprocess_text(text)
        seq = self.tokenizer.texts_to_sequences([processed_text])
        padded_seq = pad_sequences(seq, maxlen=self.max_len, padding='post', truncating='post')

        prediction = float(model.predict(padded_seq, verbose=0)[0][0])
        sentiment = 'Positive' if prediction >= 0.5 else 'Negative'
        confidence = prediction * 100 if prediction >= 0.5 else (1-prediction) * 100

        return sentiment, confidence

def main():
    filepath = '/content/drive/My Drive/Sentiment_Analysis/Dataset/training.1600000.processed.noemoticon.csv'
    analyzer = ScalableSentimentAnalyzer()
    model, history = analyzer.train(filepath)

    # Save tokenizer
    import pickle
    with open('/content/drive/My Drive/Sentiment_Analysis/Models/tokenizer.pickle', 'wb') as handle:
        pickle.dump(analyzer.tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

    test_tweets = [
        "I absolutely love this product! The quality is amazing!",
        "This is the worst experience ever. Total waste of money.",
        "The product is okay, not great but not terrible either.",
        "Incredible service and lightning fast delivery!",
        "Completely disappointed with the quality. Would not recommend."
    ]

    print("\nTesting Model Predictions:")
    for tweet in test_tweets:
        sentiment, confidence = analyzer.predict(model, tweet)
        print(f"\nTweet: {tweet}")
        print(f"Sentiment: {sentiment}")
        print(f"Confidence: {confidence:.2f}%")

if __name__ == '__main__':
    main()

Loading data in chunks...
Processed 250000 samples...
Processed 500000 samples...
Processed 750000 samples...
Processed 1000000 samples...
Processed 1250000 samples...
Processed 1500000 samples...
Processed 1600000 samples...
Fitting tokenizer on training data...




Epoch 1/10
[1m4516/4516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.7585 - auc: 0.8383 - loss: 0.4868 - precision: 0.7627 - recall: 0.7508



[1m4516/4516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m290s[0m 61ms/step - accuracy: 0.7585 - auc: 0.8383 - loss: 0.4868 - precision: 0.7627 - recall: 0.7508 - val_accuracy: 0.8111 - val_auc: 0.8953 - val_loss: 0.4093 - val_precision: 0.8085 - val_recall: 0.8155 - learning_rate: 5.0000e-04
Epoch 2/10
[1m4515/4516[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 55ms/step - accuracy: 0.8231 - auc: 0.9035 - loss: 0.3937 - precision: 0.8247 - recall: 0.8213



[1m4516/4516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 60ms/step - accuracy: 0.8231 - auc: 0.9035 - loss: 0.3937 - precision: 0.8247 - recall: 0.8213 - val_accuracy: 0.8181 - val_auc: 0.9013 - val_loss: 0.3969 - val_precision: 0.8279 - val_recall: 0.8031 - learning_rate: 5.0000e-04
Epoch 3/10
[1m4515/4516[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 55ms/step - accuracy: 0.8421 - auc: 0.9202 - loss: 0.3596 - precision: 0.8448 - recall: 0.8386



[1m4516/4516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 60ms/step - accuracy: 0.8421 - auc: 0.9202 - loss: 0.3596 - precision: 0.8448 - recall: 0.8386 - val_accuracy: 0.8184 - val_auc: 0.9014 - val_loss: 0.3979 - val_precision: 0.8295 - val_recall: 0.8015 - learning_rate: 5.0000e-04
Epoch 4/10
[1m4516/4516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 60ms/step - accuracy: 0.8599 - auc: 0.9350 - loss: 0.3259 - precision: 0.8640 - recall: 0.8543 - val_accuracy: 0.8142 - val_auc: 0.8969 - val_loss: 0.4128 - val_precision: 0.8214 - val_recall: 0.8029 - learning_rate: 5.0000e-04
Epoch 5/10
[1m4516/4516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 59ms/step - accuracy: 0.8877 - auc: 0.9549 - loss: 0.2719 - precision: 0.8931 - recall: 0.8812 - val_accuracy: 0.8088 - val_auc: 0.8869 - val_loss: 0.4933 - val_precision: 0.8096 - va

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import numpy as np

def load_model_and_tokenizer(model_path, tokenizer_path):
    # Load the trained model
    model = tf.keras.models.load_model(model_path)

    # Load the tokenizer
    with open(tokenizer_path, 'rb') as handle:
        tokenizer = pickle.load(handle)

    return model, tokenizer

def test_sentiment(model, tokenizer, text, max_len=150):
    # Preprocess and predict
    sequence = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(sequence, maxlen=max_len, padding='post', truncating='post')
    prediction = float(model.predict(padded, verbose=0)[0][0])

    sentiment = 'Positive' if prediction >= 0.5 else 'Negative'
    confidence = prediction * 100 if prediction >= 0.5 else (1-prediction) * 100

    return sentiment, confidence

def main():
    # Update these paths to your model and tokenizer locations
    MODEL_PATH = '/content/drive/My Drive/Sentiment_Analysis/Models/best_model1.h5'
    TOKENIZER_PATH = '/content/drive/My Drive/Sentiment_Analysis/Models/tokenizer.pickle'

    # Load model and tokenizer
    model, tokenizer = load_model_and_tokenizer(MODEL_PATH, TOKENIZER_PATH)

    # Test cases organized by categories
    test_cases = {
        "Strong Positive": [
            "This product absolutely exceeded all my expectations! Best purchase ever!",
            "Outstanding customer service and lightning-fast delivery. Highly recommend!",
            "The quality is incredible and the price is unbeatable. A perfect 10/10!",
            "Love love love this! Can't imagine using anything else now.",
            "Game-changing product that solved all my problems!"
        ],

        "Moderate Positive": [
            "Pretty good product, does what it's supposed to do.",
            "Nice features and good value for money.",
            "Better than I expected, would probably buy again.",
            "Solid performance and reliable service.",
            "Good experience overall, minor issues but nothing serious."
        ],

        "Mixed/Neutral": [
            "It's okay, nothing special but not terrible either.",
            "Has some good features but also some drawbacks.",
            "Not sure how I feel about this yet, need more time.",
            "Average product for the price point.",
            "Some things I like, others could be improved."
        ],

        "Moderate Negative": [
            "Not quite what I was hoping for, somewhat disappointed.",
            "Several issues need to be addressed before I'd recommend this.",
            "Expected better quality for the price.",
            "Customer service could definitely be improved.",
            "Wouldn't buy again but it's not completely terrible."
        ],

        "Strong Negative": [
            "Terrible experience! Complete waste of money and time.",
            "Worst product I've ever used. Absolutely disappointing!",
            "Stay away from this! Nothing but problems and frustration.",
            "Horrible customer service and defective product. Avoid at all costs!",
            "Total disaster from start to finish. Never again!"
            "I want to die and don't want to live anymore"
        ],

        "Complex/Nuanced": [
            "While the product has some great features, the bugs make it unusable.",
            "Amazing quality but the price is just too high for what you get.",
            "Started great but quality decreased over time.",
            "Love the service but the product itself needs work.",
            "Good intentions but poor execution. Needs improvement."
        ],

        "Technical/Specific": [
            "The API integration works flawlessly but the documentation is lacking.",
            "CPU usage is optimized but RAM consumption is still too high.",
            "Great UI/UX design but backend performance issues persist.",
            "Excellent compatibility with legacy systems but new features are buggy.",
            "Strong security features but impacts overall system performance."
        ],
        "Strong Negative": [
    "Terrible experience! Complete waste of money and time.",
    "Worst product I've ever used. Absolutely disappointing!",
    "Stay away from this! Nothing but problems and frustration.",
    "Horrible customer service and defective product. Avoid at all costs!",
    "Total disaster from start to finish. Never again!",
    "This is the most infuriating service I've ever encountered.",
    "Absolutely dreadful quality and completely unreliable."
],
        "Extreme Emotional": [
    "I feel completely hopeless about this situation.",
    "This experience has left me feeling devastated.",
    "Nothing could make me feel worse than this product did.",
    "I've never been so disappointed and frustrated in my life."
]
    }

    # Test and display results
    print("=== Sentiment Analysis Test Results ===\n")

    for category, sentences in test_cases.items():
        print(f"\n{category.upper()}:")
        print("-" * 50)

        for text in sentences:
            sentiment, confidence = test_sentiment(model, tokenizer, text)
            print(f"\nText: {text}")
            print(f"Sentiment: {sentiment}")
            print(f"Confidence: {confidence:.2f}%")
            print("-" * 30)

if __name__ == "__main__":
    main()



=== Sentiment Analysis Test Results ===


STRONG POSITIVE:
--------------------------------------------------

Text: This product absolutely exceeded all my expectations! Best purchase ever!
Sentiment: Positive
Confidence: 77.64%
------------------------------

Text: Outstanding customer service and lightning-fast delivery. Highly recommend!
Sentiment: Positive
Confidence: 98.83%
------------------------------

Text: The quality is incredible and the price is unbeatable. A perfect 10/10!
Sentiment: Positive
Confidence: 87.60%
------------------------------

Text: Love love love this! Can't imagine using anything else now.
Sentiment: Positive
Confidence: 96.88%
------------------------------

Text: Game-changing product that solved all my problems!
Sentiment: Positive
Confidence: 89.45%
------------------------------

MODERATE POSITIVE:
--------------------------------------------------

Text: Pretty good product, does what it's supposed to do.
Sentiment: Positive
Confidence: 74.41%
---

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import numpy as np

def load_model_and_tokenizer(model_path, tokenizer_path):
    # Load the trained model
    model = tf.keras.models.load_model(model_path)

    # Load the tokenizer
    with open(tokenizer_path, 'rb') as handle:
        tokenizer = pickle.load(handle)

    return model, tokenizer

def test_sentiment(model, tokenizer, text, max_len=150):
    # Preprocess and predict
    sequence = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(sequence, maxlen=max_len, padding='post', truncating='post')
    prediction = float(model.predict(padded, verbose=0)[0][0])

    sentiment = 'Positive' if prediction >= 0.5 else 'Negative'
    confidence = prediction * 100 if prediction >= 0.5 else (1-prediction) * 100

    return sentiment, confidence

def main():
    # Update these paths to your model and tokenizer locations
    MODEL_PATH = '/content/drive/My Drive/Sentiment_Analysis/Models/best_model1.h5'
    TOKENIZER_PATH = '/content/drive/My Drive/Sentiment_Analysis/Models/tokenizer.pickle'

    # Load model and tokenizer
    print("Loading model and tokenizer...")
    model, tokenizer = load_model_and_tokenizer(MODEL_PATH, TOKENIZER_PATH)
    print("Model and tokenizer loaded successfully!")

    print("\n=== Interactive Sentiment Analysis ===")
    print("Type 'quit' or 'exit' to end the program")

    while True:
        # Get user input
        user_text = input("\nEnter text for sentiment analysis: ")

        # Check if the user wants to exit
        if user_text.lower() in ['quit', 'exit']:
            print("Exiting program. Goodbye!")
            break

        # Analyze the sentiment of the user input
        sentiment, confidence = test_sentiment(model, tokenizer, user_text)

        # Display the results
        print("\nResults:")
        print("-" * 30)
        print(f"Text: {user_text}")
        print(f"Sentiment: {sentiment}")
        print(f"Confidence: {confidence:.2f}%")
        print("-" * 30)

if __name__ == "__main__":
    main()

Loading model and tokenizer...




Model and tokenizer loaded successfully!

=== Interactive Sentiment Analysis ===
Type 'quit' or 'exit' to end the program

Enter text for sentiment analysis: i want to die

Results:
------------------------------
Text: i want to die
Sentiment: Negative
Confidence: 85.88%
------------------------------

Enter text for sentiment analysis: i am enjoying here 

Results:
------------------------------
Text: i am enjoying here 
Sentiment: Positive
Confidence: 99.02%
------------------------------

Enter text for sentiment analysis: i have ruined the party

Results:
------------------------------
Text: i have ruined the party
Sentiment: Negative
Confidence: 98.77%
------------------------------


KeyboardInterrupt: Interrupted by user

In [None]:
##end 