# LIME - Twitter Sentiment Explanation

In [10]:
# LIME Twitter Sentiment - Single Cell
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report
from lime.lime_text import LimeTextExplainer

# 1) Load and clean data
df = pd.read_csv('twitter_training.csv', header=None, 
                 names=['tweet_id', 'entity', 'sentiment', 'tweet_content'])
print(f"Dataset loaded: {len(df)} rows")

df = df.dropna(subset=['tweet_content', 'sentiment'])
df = df[df['sentiment'].isin(['Positive', 'Negative', 'Neutral'])].copy()

def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'http\S+|www\S+', '', text)
    text = re.sub(r'@\w+', '', text)
    text = re.sub(r'#', '', text)
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    return text.strip()

df['cleaned'] = df['tweet_content'].apply(clean_text)
df = df[df['cleaned'].str.len() > 5]

# Sample for speed
df_sample = df.sample(n=min(3000, len(df)), random_state=42)
print(f"Using {len(df_sample)} samples")
print(f"Sentiment distribution:\n{df_sample['sentiment'].value_counts()}\n")

# 2) Prepare data
texts = df_sample['cleaned'].values
labels = df_sample['sentiment'].values
class_names = sorted(df_sample['sentiment'].unique())

X_train, X_test, y_train, y_test = train_test_split(
    texts, labels, test_size=0.2, random_state=42, stratify=labels
)

# 3) Train model
model = make_pipeline(
    TfidfVectorizer(max_features=2000, ngram_range=(1,2), stop_words='english'),
    LogisticRegression(max_iter=500, random_state=42)
)
model.fit(X_train, y_train)

# 4) Evaluate
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {acc*100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# 5) Create LIME explainer
print("\n" )
print("LIME EXPLANATIONS")
explainer = LimeTextExplainer(class_names=class_names)

# 6) Explain 3 test examples
test_indices = [0, 5, 10]
for idx in test_indices:
    if idx >= len(X_test):
        continue
    text = X_test[idx]
    true_label = y_test[idx]
    
    exp = explainer.explain_instance(text, model.predict_proba, num_features=8)
    pred_label = class_names[exp.predict_proba.argmax()]
    
    print(f"\nExample {idx+1}:")
    print(f"Text: {text[:100]}...")
    print(f"True: {true_label} | Predicted: {pred_label}")
    # print("Top influential words:")
    # for word, weight in exp.as_list()[:8]:
    #     emoji = "✓" if weight > 0 else "✗"
    #     print(f"  {emoji} {word}: {weight:+.3f}")

# 7) Custom explanation function
def explain_custom_text(text):
    cleaned = clean_text(text)
    exp = explainer.explain_instance(cleaned, model.predict_proba, num_features=10)
    pred_class = class_names[exp.predict_proba.argmax()]
    prob = exp.predict_proba.max()
    
    print("\n")
    print(f"Text: {text}")
    print(f"Predicted: {pred_class} (Confidence: {prob*100:.1f}%)")
    # print("\nTop features contributing to prediction:")
    # for word, weight in exp.as_list():
    #     emoji = "✓" if weight > 0 else "✗"
    #     print(f"  {emoji} '{word}': {weight:+.3f}")
    return exp

print("\n" )
print("Model ready!")


Dataset loaded: 74682 rows
Using 3000 samples
Sentiment distribution:
sentiment
Negative    1066
Positive    1014
Neutral      920
Name: count, dtype: int64

Using 3000 samples
Sentiment distribution:
sentiment
Negative    1066
Positive    1014
Neutral      920
Name: count, dtype: int64


Model Accuracy: 58.50%

Classification Report:
              precision    recall  f1-score   support

    Negative       0.58      0.65      0.62       213
     Neutral       0.58      0.49      0.53       184
    Positive       0.60      0.60      0.60       203

    accuracy                           0.58       600
   macro avg       0.58      0.58      0.58       600
weighted avg       0.58      0.58      0.58       600



LIME EXPLANATIONS

Model Accuracy: 58.50%

Classification Report:
              precision    recall  f1-score   support

    Negative       0.58      0.65      0.62       213
     Neutral       0.58      0.49      0.53       184
    Positive       0.60      0.60      0.60       2

## Test Own Text

In [12]:
# Enter your text here
my_text = "This product is absolutely amazing! I love it so much."

# Get LIME explanation
explanation = explain_custom_text(my_text)



Text: This product is absolutely amazing! I love it so much.
Predicted: Positive (Confidence: 88.6%)
