In [None]:
# === 📦 Imports ===
import os
import numpy as np
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification, TFRobertaForSequenceClassification, RobertaTokenizer
!pip install lime

from lime.lime_text import LimeTextExplainer
from sklearn.preprocessing import LabelEncoder
import joblib  # for loading XGBoost model
from keras.models import load_model  # for LSTM
import matplotlib.pyplot as plt

# === 📁 Load all models from Google Drive ===
bert_path = "/content/drive/My Drive/tf_bert_sentiment_model"
roberta_path = "/content/drive/My Drive/roberta_sentiment_model_final"
lstm_path = "/content/drive/My Drive/Sentimental-Analysis/models/lstm_model.h5"
xgb_path = "/content/drive/My Drive/Sentimental-Analysis/models/xgb_model.pkl"
vectorizer_path = "/content/drive/My Drive/Sentimental-Analysis/models/tfidf_vectorizer.pkl"

# === 📥 Load Tokenizers & Models ===
bert_tokenizer = BertTokenizer.from_pretrained(bert_path, local_files_only=True)
bert_model = TFBertForSequenceClassification.from_pretrained(bert_path, local_files_only=True)

roberta_tokenizer = RobertaTokenizer.from_pretrained(roberta_path, local_files_only=True)
roberta_model = TFRobertaForSequenceClassification.from_pretrained(roberta_path, local_files_only=True)

lstm_model = load_model(lstm_path)
xgb_model = joblib.load(xgb_path)
vectorizer = joblib.load(vectorizer_path)

# === 🏷️ Labels ===
label_encoder = LabelEncoder()
label_encoder.classes_ = np.array(['negative', 'neutral', 'positive'])
class_names = list(label_encoder.classes_)

# === 🔍 Individual Model Prediction Functions ===
def bert_predict(texts):
    inputs = bert_tokenizer(texts, return_tensors="tf", padding=True, truncation=True, max_length=128)
    outputs = bert_model(inputs)
    return tf.nn.softmax(outputs.logits, axis=-1).numpy()

def roberta_predict(texts):
    inputs = roberta_tokenizer(texts, return_tensors="tf", padding=True, truncation=True, max_length=128)
    outputs = roberta_model(inputs)
    return tf.nn.softmax(outputs.logits, axis=-1).numpy()

def lstm_predict(texts):
    from tensorflow.keras.preprocessing.text import Tokenizer
    from tensorflow.keras.preprocessing.sequence import pad_sequences
    tokenizer_lstm = Tokenizer(num_words=5000)
    tokenizer_lstm.fit_on_texts(texts)
    sequences = tokenizer_lstm.texts_to_sequences(texts)
    padded = pad_sequences(sequences, maxlen=100)
    probs = lstm_model.predict(padded)
    return probs

def xgb_predict(texts):
    X = vectorizer.transform(texts)
    probs = xgb_model.predict_proba(X)
    return probs

# === 🤝 Ensemble Prediction Function ===
def ensemble_predict_proba(texts):
    bert_probs = bert_predict(texts)
    roberta_probs = roberta_predict(texts)
    lstm_probs = lstm_predict(texts)
    xgb_probs = xgb_predict(texts)
    ensemble_probs = (bert_probs + roberta_probs + lstm_probs + xgb_probs) / 4.0
    return ensemble_probs

# === 📊 Plotting Function for Class Probabilities ===
def plot_probabilities(probabilities, classes):
    plt.figure(figsize=(5, 2))
    bars = plt.bar(classes, probabilities, alpha=0.7)
    plt.ylim(0, 1)
    for bar, prob in zip(bars, probabilities):
        plt.text(bar.get_x() + bar.get_width()/2, prob + 0.02, f'{prob:.2f}', ha='center', fontsize=10)
    plt.ylabel("Probability")
    plt.title("Predicted Class Probabilities")
    plt.grid(axis='y', linestyle='--', alpha=0.5)
    plt.show()

# === 🧠 LIME Setup ===
explainer = LimeTextExplainer(class_names=class_names)

# === 📝 Sample Tweets for Explanation ===
sample_tweets = [
    "The flight was delayed and customer service was terrible",
    "I love the new seats and the food was excellent!",
    "The check-in process was okay, nothing special.",
    "Worst experience ever. I will never fly with them again.",
    "Very smooth journey and friendly crew members."
]

# === 🔍 Loop Through Tweets and Explain with LIME ===
for i, tweet in enumerate(sample_tweets, 1):
    print(f"\n{'='*40}\n📝 Example {i}\nTweet: {tweet}")
    proba = ensemble_predict_proba([tweet])[0]
    predicted_label = label_encoder.classes_[np.argmax(proba)]

    print(f"✅ Predicted Sentiment (Ensemble): {predicted_label}")
    print(f"🔢 Class Probabilities: {dict(zip(class_names, proba.round(3)))}")

    # 📊 Plot the bar chart
    plot_probabilities(proba, class_names)

    # 🧠 Show LIME explanation
    exp = explainer.explain_instance(tweet, ensemble_predict_proba, num_features=6, top_labels=1)
    exp.show_in_notebook(text=True)


In [None]:
from transformers import RobertaTokenizer, TFRobertaForSequenceClassification

save_path = "/content/drive/My Drive/roberta_sentiment_model_final"

# Load pretrained RoBERTa
roberta_tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
roberta_model = TFRobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=3)

# Save to Drive
roberta_tokenizer.save_pretrained(save_path)
roberta_model.save_pretrained(save_path)


In [None]:
from google.colab import drive
drive.mount('/content/drive')
