<a href="https://colab.research.google.com/github/Ritikseptember2003/Frayit_MVP_ML_MODEL/blob/main/Enhanced_toxicity_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Run this in your first Colab cell
!pip install pandas numpy scikit-learn xgboost matplotlib seaborn nltk flask joblib requests
!pip install --upgrade scikit-learn

# Download NLTK data
import nltk
nltk.download('stopwords', quiet=True)
nltk.download('punkt', quiet=True)



True

In [2]:
from google.colab import files
import zipfile
import os

print("Please upload the jigsaw-toxic-comment-classification-challenge.zip file")
uploaded = files.upload()

# Extract the dataset
for filename in uploaded.keys():
    print(f"Extracting {filename}...")
    with zipfile.ZipFile(filename, 'r') as zip_ref:
        zip_ref.extractall('jigsaw_data/')

    # If train.csv is also zipped inside
    if os.path.exists('jigsaw_data/train.csv.zip'):
        with zipfile.ZipFile('jigsaw_data/train.csv.zip', 'r') as zip_ref:
            zip_ref.extractall('jigsaw_data/')

print("✅ Dataset uploaded and extracted!")

Please upload the jigsaw-toxic-comment-classification-challenge.zip file


Saving jigsaw-toxic-comment-classification-challenge.zip to jigsaw-toxic-comment-classification-challenge.zip
Extracting jigsaw-toxic-comment-classification-challenge.zip...
✅ Dataset uploaded and extracted!


## Extracting the Dataset

In [3]:
import zipfile
import os
import pandas as pd

# Extract the uploaded zip file
zip_files = [f for f in os.listdir('.') if f.endswith('.zip')]
print(f"Found zip files: {zip_files}")

# Extract the first zip file found
if zip_files:
    zip_filename = zip_files[0]
    print(f"Extracting {zip_filename}...")

    with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
        zip_ref.extractall('dataset/')

    print("✅ Extraction complete!")

    # List extracted files
    for root, dirs, files in os.walk('dataset/'):
        for file in files:
            print(f"Found: {os.path.join(root, file)}")
else:
    print("❌ No zip file found")

Found zip files: ['jigsaw-toxic-comment-classification-challenge.zip']
Extracting jigsaw-toxic-comment-classification-challenge.zip...
✅ Extraction complete!
Found: dataset/train.csv.zip
Found: dataset/test_labels.csv.zip
Found: dataset/sample_submission.csv.zip
Found: dataset/test.csv.zip


## Finding the train.csv File

In [4]:
# Look for train.csv file
train_csv_path = None

for root, dirs, files in os.walk('.'):
    for file in files:
        if file == 'train.csv':
            train_csv_path = os.path.join(root, file)
            print(f"✅ Found train.csv at: {train_csv_path}")
            break
    if train_csv_path:
        break

# If train.csv is also zipped, extract it
if not train_csv_path:
    for root, dirs, files in os.walk('.'):
        for file in files:
            if file == 'train.csv.zip':
                print(f"Found train.csv.zip, extracting...")
                with zipfile.ZipFile(os.path.join(root, file), 'r') as zip_ref:
                    zip_ref.extractall('dataset/')

                # Look again for train.csv
                for root2, dirs2, files2 in os.walk('dataset/'):
                    for file2 in files2:
                        if file2 == 'train.csv':
                            train_csv_path = os.path.join(root2, file2)
                            print(f"✅ Extracted and found train.csv at: {train_csv_path}")
                            break
                break

if train_csv_path:
    print(f"✅ Dataset ready at: {train_csv_path}")
else:
    print("❌ Could not find train.csv")

✅ Found train.csv at: ./jigsaw_data/train.csv
✅ Dataset ready at: ./jigsaw_data/train.csv


## Dataset Cross Checking

In [5]:
# Quick check of the dataset
if train_csv_path:
    df = pd.read_csv(train_csv_path)
    print(f"Dataset shape: {df.shape}")
    print(f"Columns: {df.columns.tolist()}")
    print("\nFirst few rows:")
    print(df.head(2))

    # Check toxicity columns
    toxicity_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
    available_toxicity = [col for col in toxicity_cols if col in df.columns]
    print(f"\nToxicity columns found: {available_toxicity}")

    if available_toxicity:
        print("✅ Dataset looks good!")
    else:
        print("❌ No toxicity columns found")

Dataset shape: (159571, 8)
Columns: ['id', 'comment_text', 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

First few rows:
                 id                                       comment_text  toxic  \
0  0000997932d777bf  Explanation\nWhy the edits made under my usern...      0   
1  000103f0d9cfb60f  D'aww! He matches this background colour I'm s...      0   

   severe_toxic  obscene  threat  insult  identity_hate  
0             0        0       0       0              0  
1             0        0       0       0              0  

Toxicity columns found: ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
✅ Dataset looks good!


## Main Code

In [6]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.3.0-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.3.0


In [13]:
# ============================================
# ENHANCED TOXICITY DETECTION MODEL WITH RL
# ============================================

import pandas as pd
import numpy as np
import joblib
import requests
import time, json, os
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, roc_auc_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt
import seaborn as sns
from collections import deque
import random
from typing import Dict, List, Tuple
import re

# Enhanced preprocessing
import nltk
try:
    nltk.download('stopwords', quiet=True)
    nltk.download('punkt', quiet=True)
    from nltk.corpus import stopwords
    from nltk.tokenize import word_tokenize
    STOPWORDS = set(stopwords.words('english'))
except:
    STOPWORDS = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'])

# Flask for deployment
try:
    from flask import Flask, request, jsonify, render_template_string
    # pyngrok for tunneling (required for Colab)
    from pyngrok import ngrok
    FLASK_AVAILABLE = True
except ImportError:
    FLASK_AVAILABLE = False
    print("Flask not available. Install with: pip install flask pyngrok")

# ============================================
# CONFIG & ENHANCED PARAMETERS
# ============================================
MODEL_PATH = "enhanced_toxicity_model.pkl"
RL_MODEL_PATH = "rl_toxicity_agent.pkl"
REPORTS_PATH = "reports.json"
FEEDBACK_PATH = "feedback.csv"
PERFORMANCE_LOG = "performance_log.json"

# Toxicity categories with severity weights
TOXICITY_CATEGORIES = {
    'toxic': {'weight': 1.0, 'description': 'General toxic behavior'},
    'severe_toxic': {'weight': 2.0, 'description': 'Severely toxic content'},
    'obscene': {'weight': 1.2, 'description': 'Obscene language'},
    'threat': {'weight': 2.5, 'description': 'Threatening behavior'},
    'insult': {'weight': 1.1, 'description': 'Insulting language'},
    'identity_hate': {'weight': 2.0, 'description': 'Identity-based hate speech'}
}

# ============================================
# ENHANCED TEXT PREPROCESSING
# ============================================
def clean_text(text):
    """Enhanced text cleaning and preprocessing"""
    if pd.isna(text) or text == '':
        return ''

    text = str(text).lower()

    # Remove URLs
    text = re.sub(r'http\S+|www.\S+', '', text)

    # Remove email addresses
    text = re.sub(r'\S+@\S+', '', text)

    # Handle contractions
    contractions = {
        "won't": "will not", "can't": "cannot", "n't": " not",
        "'re": " are", "'ve": " have", "'ll": " will", "'d": " would",
        "'m": " am", "u": "you", "ur": "your", "ppl": "people"
    }
    for contraction, expansion in contractions.items():
        text = text.replace(contraction, expansion)

    # Remove excessive punctuation but keep some for context
    text = re.sub(r'[!]{2,}', '!', text)
    text = re.sub(r'[?]{2,}', '?', text)
    text = re.sub(r'[.]{3,}', '...', text)

    # Remove excessive whitespace
    text = re.sub(r'\s+', ' ', text)

    return text.strip()

def extract_features(text):
    """Extract additional features from text"""
    if not text:
        return {}

    return {
        'length': len(text),
        'word_count': len(text.split()),
        'caps_ratio': sum(1 for c in text if c.isupper()) / max(len(text), 1),
        'exclamation_count': text.count('!'),
        'question_count': text.count('?'),
        'profanity_indicators': len(re.findall(r'\b(damn|hell|crap|stupid|idiot)\b', text.lower())),
    }

# ============================================
# REINFORCEMENT LEARNING AGENT
# ============================================
class ToxicityRLAgent:
    def __init__(self, state_size=10, action_size=6, learning_rate=0.001):
        self.state_size = state_size
        self.action_size = action_size  # 6 toxicity categories
        self.memory = deque(maxlen=10000)
        self.learning_rate = learning_rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995

        # Q-table for simple Q-learning (can be upgraded to neural network)
        self.q_table = {}
        self.reward_history = []

    def get_state(self, text_features, prediction_confidence):
        """Convert text features and prediction confidence to state"""
        state = tuple([
            min(int(text_features.get('length', 0) / 10), 9),
            min(int(text_features.get('word_count', 0) / 5), 9),
            min(int(text_features.get('caps_ratio', 0) * 10), 9),
            min(text_features.get('exclamation_count', 0), 9),
            min(text_features.get('question_count', 0), 9),
            min(text_features.get('profanity_indicators', 0), 9),
            min(int(prediction_confidence * 10), 9),
            0, 0, 0  # padding for fixed state size
        ][:self.state_size])
        return state

    def act(self, state):
        """Choose action based on epsilon-greedy policy"""
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)

        if state not in self.q_table:
            self.q_table[state] = np.zeros(self.action_size)

        return np.argmax(self.q_table[state])

    def remember(self, state, action, reward, next_state, done):
        """Store experience in memory"""
        self.memory.append((state, action, reward, next_state, done))

    def calculate_reward(self, predicted_label, actual_label, user_feedback=None):
        """Calculate reward based on prediction accuracy and user feedback"""
        base_reward = 1.0 if predicted_label == actual_label else -1.0

        # Bonus for user feedback
        if user_feedback is not None:
            if user_feedback == 'correct':
                base_reward += 0.5
            elif user_feedback == 'incorrect':
                base_reward -= 0.5

        return base_reward

    def replay(self, batch_size=32):
        """Train the agent on a batch of experiences"""
        if len(self.memory) < batch_size:
            return

        batch = random.sample(self.memory, batch_size)

        for state, action, reward, next_state, done in batch:
            if state not in self.q_table:
                self.q_table[state] = np.zeros(self.action_size)
            if next_state not in self.q_table:
                self.q_table[next_state] = np.zeros(self.action_size)

            target = reward
            if not done:
                target += 0.95 * np.amax(self.q_table[next_state])

            self.q_table[state][action] = target

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def save(self, filepath):
        """Save the RL agent"""
        agent_data = {
            'q_table': {str(k): v.tolist() for k, v in self.q_table.items()},
            'epsilon': self.epsilon,
            'reward_history': self.reward_history
        }
        with open(filepath, 'w') as f:
            json.dump(agent_data, f)

    def load(self, filepath):
        """Load the RL agent"""
        if os.path.exists(filepath):
            with open(filepath, 'r') as f:
                agent_data = json.load(f)
            self.q_table = {eval(k): np.array(v) for k, v in agent_data['q_table'].items()}
            self.epsilon = agent_data.get('epsilon', self.epsilon)
            self.reward_history = agent_data.get('reward_history', [])

# ============================================
# ENHANCED MODEL CLASS
# ============================================
class EnhancedToxicityDetector:
    def __init__(self):
        self.pipeline = None
        self.rl_agent = ToxicityRLAgent()
        self.threshold_dict = {category: 0.5 for category in TOXICITY_CATEGORIES.keys()}
        self.performance_history = []

    def create_enhanced_pipeline(self):
        """Create an ensemble pipeline with multiple models"""

        # TF-IDF Vectorizer with enhanced parameters
        tfidf = TfidfVectorizer(
            max_features=100000,
            ngram_range=(1, 3),  # Increased n-gram range
            stop_words='english',
            lowercase=True,
            strip_accents='unicode',
            analyzer='word',
            min_df=2,
            max_df=0.95
        )

        # Ensemble of multiple classifiers
        xgb_clf = XGBClassifier(
            n_estimators=500,
            max_depth=8,
            learning_rate=0.05,
            subsample=0.9,
            colsample_bytree=0.9,
            reg_alpha=0.1,
            reg_lambda=0.1,
            eval_metric="logloss",
            use_label_encoder=False,
            random_state=42,
            n_jobs=-1
        )

        lr_clf = LogisticRegression(
            C=1.0,
            solver='liblinear',
            random_state=42,
            max_iter=1000
        )

        # Voting ensemble
        ensemble = VotingClassifier([
            ('xgb', xgb_clf),
            ('lr', lr_clf)
        ], voting='soft')

        self.pipeline = Pipeline([
            ('tfidf', tfidf),
            ('scaler', StandardScaler(with_mean=False)),  # For sparse matrices
            ('ensemble', ensemble)
        ])

        return self.pipeline

    def train(self, X_train, y_train, X_val=None, y_val=None):
        """Enhanced training with cross-validation and hyperparameter tuning"""
        print("🚀 Starting enhanced training...")

        if self.pipeline is None:
            self.create_enhanced_pipeline()

        # Cross-validation for robust performance estimation
        cv_scores = []
        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

        # --- FIX STARTS HERE ---
        # Pass the numpy array values to skf.split to ensure correct indexing
        for fold, (train_indices, val_indices) in enumerate(skf.split(X_train.values, y_train.values)):
            print(f"Training fold {fold + 1}/5...")

            # Use the indices to access the original X_train and y_train Series/array
            X_fold_train = X_train.iloc[train_indices]
            y_fold_train = y_train.iloc[train_indices]
            X_fold_val = X_train.iloc[val_indices]
            y_fold_val = y_train.iloc[val_indices]
            # --- FIX ENDS HERE ---

            # Create a copy of pipeline for this fold
            fold_pipeline = self.create_enhanced_pipeline()
            fold_pipeline.fit(X_fold_train, y_fold_train)

            # Evaluate fold
            fold_preds = fold_pipeline.predict_proba(X_fold_val)[:, 1]
            fold_auc = roc_auc_score(y_fold_val, fold_preds)
            cv_scores.append(fold_auc)
            print(f"Fold {fold + 1} AUC: {fold_auc:.4f}")

        print(f"Mean CV AUC: {np.mean(cv_scores):.4f} (+/- {np.std(cv_scores) * 2:.4f})")

        # Train on full training set
        self.pipeline.fit(X_train, y_train)

        # Optimize thresholds if validation set provided
        if X_val is not None and y_val is not None:
            self.optimize_thresholds(X_val, y_val)

        print("✅ Training completed!")

    def optimize_thresholds(self, X_val, y_val):
        """Optimize classification thresholds"""
        print("🎯 Optimizing classification thresholds...")

        val_probs = self.pipeline.predict_proba(X_val)[:, 1]

        best_f1 = 0
        best_threshold = 0.5

        thresholds = np.arange(0.1, 0.9, 0.05)
        threshold_results = []

        for threshold in thresholds:
            val_preds = (val_probs >= threshold).astype(int)
            f1 = f1_score(y_val, val_preds)
            precision = precision_score(y_val, val_preds)
            recall = recall_score(y_val, val_preds)

            threshold_results.append({
                'threshold': threshold,
                'f1': f1,
                'precision': precision,
                'recall': recall
            })

            if f1 > best_f1:
                best_f1 = f1
                best_threshold = threshold

        # Update threshold for binary classification
        self.threshold_dict['binary'] = best_threshold
        print(f"✅ Best threshold: {best_threshold:.3f} with F1: {best_f1:.3f}")

        return threshold_results

    def predict_with_rl(self, text, use_rl=True):
        """Make prediction with optional RL enhancement"""
        if self.pipeline is None:
            raise ValueError("Model not trained yet!")

        # Clean and process text
        cleaned_text = clean_text(text)
        text_features = extract_features(cleaned_text)

        # Get base prediction
        prob = self.pipeline.predict_proba([cleaned_text])[0, 1]
        base_prediction = int(prob >= self.threshold_dict.get('binary', 0.5))

        raw_score = prob

        if use_rl:
            # Get RL agent's recommendation
            state = self.rl_agent.get_state(text_features, prob)
            rl_action = self.rl_agent.act(state)

            # Combine base prediction with RL recommendation
            # RL action 0 = not toxic, actions 1-5 = different toxicity types
            rl_prediction = 1 if rl_action > 0 else 0

            # Weighted combination
            final_prediction = int(0.7 * base_prediction + 0.3 * rl_prediction >= 0.5)


            return {
                'text': text,
                'cleaned_text': cleaned_text,
                'probability': float(prob),
                'base_prediction': base_prediction,
                'rl_action': int(rl_action),
                'final_prediction': final_prediction,
                'toxicity_type': list(TOXICITY_CATEGORIES.keys())[rl_action] if rl_action > 0 else 'clean',
                'confidence': float(abs(prob - 0.5) * 2),  # Confidence metric
                'features': text_features,
                'rawScore': float(raw_score)
            }
        else:
            return {
                'text': text,
                'cleaned_text': cleaned_text,
                'probability': float(prob),
                'prediction': base_prediction,
                'confidence': float(abs(prob - 0.5) * 2),
                'features': text_features,
                'rawScore': float(raw_score)
            }

    def learn_from_feedback(self, text, predicted_result, actual_label, user_feedback=None):
        """Learn from user feedback using RL"""
        if 'rl_action' not in predicted_result:
            return

        text_features = extract_features(clean_text(text))
        state = self.rl_agent.get_state(text_features, predicted_result['probability'])
        action = predicted_result['rl_action']

        # Calculate reward
        reward = self.rl_agent.calculate_reward(
            predicted_result['final_prediction'],
            actual_label,
            user_feedback
        )

        # Store experience (simplified - next_state is same as current state for this example)
        self.rl_agent.remember(state, action, reward, state, True)
        self.rl_agent.reward_history.append(reward)

        # Train RL agent
        self.rl_agent.replay()

        print(f"✅ RL agent learned from feedback. Reward: {reward:.2f}")

    def save_model(self, model_path=MODEL_PATH):
        """Save the complete model"""
        model_data = {
            'pipeline': self.pipeline,
            'threshold_dict': self.threshold_dict,
            'performance_history': self.performance_history
        }
        joblib.dump(model_data, model_path)
        self.rl_agent.save(RL_MODEL_PATH)
        print(f"✅ Model saved to {model_path} and {RL_MODEL_PATH}")

    def load_model(self, model_path=MODEL_PATH):
        """Load the complete model"""
        if os.path.exists(model_path):
            model_data = joblib.load(model_path)
            self.pipeline = model_data['pipeline']
            self.threshold_dict = model_data['threshold_dict']
            self.performance_history = model_data.get('performance_history', [])
            self.rl_agent.load(RL_MODEL_PATH)
            print(f"✅ Model loaded from {model_path}")
            return True
        return False

# ============================================
# ENHANCED DATASET PREPARATION
# ============================================
def prepare_enhanced_dataset(df_path="jigsaw_data/train.csv", sample_size=20000):
    """Prepare dataset with enhanced preprocessing and balancing"""
    print("📊 Preparing enhanced dataset...")

    df = pd.read_csv(df_path)
    print(f"Original dataset shape: {df.shape}")

    # Create comprehensive toxicity label
    toxicity_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

    # Calculate weighted toxicity score
    df['toxicity_score'] = 0
    for col in toxicity_cols:
        if col in df.columns:
            weight = TOXICITY_CATEGORIES.get(col, {}).get('weight', 1.0)
            df['toxicity_score'] += df[col] * weight

    # Binary label based on weighted score
    df['label'] = (df['toxicity_score'] > 0).astype(int)

    # Clean text
    df['comment_text'] = df['comment_text'].apply(clean_text)

    # Remove empty comments
    df = df[df['comment_text'].str.len() > 0]

    # Balanced sampling with stratification
    toxic = df[df['label'] == 1]
    clean = df[df['label'] == 0]

    n_toxic = min(len(toxic), sample_size // 3)  # 1:2 ratio for better balance
    n_clean = min(len(clean), sample_size - n_toxic)

    df_balanced = pd.concat([
        toxic.sample(n_toxic, random_state=42),
        clean.sample(n_clean, random_state=42)
    ]).sample(frac=1, random_state=42).reset_index(drop=True)

    print(f"Balanced dataset shape: {df_balanced.shape}")
    print(f"Toxicity ratio: {df_balanced['label'].mean():.3f}")

    return df_balanced[['comment_text', 'label']]

def main_training_pipeline():
    """Main training pipeline"""
    print("🚀 Starting main training pipeline...")

    # Check if dataset exists
    if not os.path.exists("jigsaw_data/train.csv"):
        print("❌ Dataset not found. Please download the Jigsaw dataset and place train.csv in 'jigsaw_data/' folder")
        print("You can download it from: https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge/data")

        # Create dummy dataset for demo
        print("Creating dummy dataset for demonstration...")
        dummy_data = {
            'comment_text': [
                "You're so stupid!",
                "I hate you!",
                "This is great work",
                "Thank you for helping",
                "You're worthless",
                "Have a nice day",
                "Go kill yourself",
                "I love this project",
                "You're an idiot",
                "Well done!"
            ] * 200,  # Repeat to get more data
            'label': [1, 1, 0, 0, 1, 0, 1, 0, 1, 0] * 200
        }
        df = pd.DataFrame(dummy_data)
    else:
        df = prepare_enhanced_dataset()

    # Split data
    X_train, X_val, y_train, y_val = train_test_split(
        df['comment_text'],
        df['label'],
        test_size=0.2,
        random_state=42,
        stratify=df['label']
    )

    print(f"Training set size: {len(X_train)}")
    print(f"Validation set size: {len(X_val)}")

    # Initialize and train model
    detector = EnhancedToxicityDetector()
    detector.train(X_train, y_train, X_val, y_val)

    # Save model
    detector.save_model()

    return detector

# ============================================
# BATCH PROCESSING & MONITORING
# ============================================
def batch_process_with_rl(detector, texts, batch_size=100):
    """Process texts in batches with RL enhancement"""
    results = []
    total_batches = (len(texts) + batch_size - 1) // batch_size

    for i in range(0, len(texts), batch_size):
        batch = texts[i:i + batch_size]
        batch_results = []

        print(f"Processing batch {i//batch_size + 1}/{total_batches}...")

        for text in batch:
            result = detector.predict_with_rl(text)
            batch_results.append(result)

        results.extend(batch_results)

        # Small delay to prevent overwhelming
        time.sleep(0.1)

    return results

# ============================================
# ADVANCED FEATURES
# ============================================
def create_toxicity_report(detector, text):
    """Generate detailed toxicity analysis report"""
    result = detector.predict_with_rl(text)

    report = {
        "text": text,
        "analysis": {
            "overall_toxicity": result['final_prediction'],
            "confidence": result['confidence'],
            "probability": result['probability'],
            "toxicity_type": result['toxicity_type']
        },
        "features": result['features'],
        "risk_factors": [],
        "recommendations": []
    }

    # Analyze risk factors
    if result['features']['caps_ratio'] > 0.3:
        report["risk_factors"].append("High proportion of capital letters")

    if result['features']['exclamation_count'] > 2:
        report["risk_factors"].append("Multiple exclamation marks")

    if result['features']['profanity_indicators'] > 0:
        report["risk_factors"].append("Contains potentially offensive language")

    # Generate recommendations
    if result['final_prediction']:
        report["recommendations"] = [
            "Consider reviewing this content before publication",
            "May require moderation or content warning",
            "Could benefit from rephrasing for more constructive communication"
        ]
    else:
        report["recommendations"] = [
            "Content appears appropriate for publication",
            "Maintains respectful communication standards"
        ]

    return report

def export_model_insights(detector, output_path="model_insights.json"):
    """Export model insights and statistics"""
    insights = {
        "model_info": {
            "type": "Enhanced Toxicity Detector with Reinforcement Learning",
            "timestamp": datetime.now().isoformat(),
            "features": [
                "Ensemble classifier (XGBoost + Logistic Regression)",
                "Advanced text preprocessing",
                "Reinforcement learning for adaptive predictions",
                "Multi-category toxicity detection",
                "Dynamic threshold optimization"
            ]
        },
        "rl_stats": {
            "epsilon": detector.rl_agent.epsilon,
            "memory_size": len(detector.rl_agent.memory),
            "total_experiences": len(detector.rl_agent.reward_history),
            "average_reward": np.mean(detector.rl_agent.reward_history) if detector.rl_agent.reward_history else 0,
            "recent_performance": np.mean(detector.rl_agent.reward_history[-100:]) if len(detector.rl_agent.reward_history) >= 100 else 0
        },
        "thresholds": detector.threshold_dict,
        "toxicity_categories": TOXICITY_CATEGORIES
    }

    with open(output_path, 'w') as f:
        json.dump(insights, f, indent=2)

    print(f"✅ Model insights exported to {output_path}")

# ============================================
# TESTING & VALIDATION
# ============================================
def run_comprehensive_tests(detector):
    """Run comprehensive tests on the model"""
    print("🧪 Running comprehensive tests...")

    test_cases = [
        # Clearly toxic
        {"text": "You're so stupid, I hate you!", "expected": 1},
        {"text": "Kill yourself, nobody likes you", "expected": 1},
        {"text": "You're worthless trash", "expected": 1},

        # Borderline cases
        {"text": "This is really annoying", "expected": 0},
        {"text": "That's pretty dumb", "expected": 0},
        {"text": "I disagree with your opinion", "expected": 0},

        # Clearly clean
        {"text": "Great job on the project!", "expected": 0},
        {"text": "Thank you for your help", "expected": 0},
        {"text": "Have a wonderful day", "expected": 0},

        # Edge cases
        {"text": "", "expected": 0},
        {"text": "!!!!!!", "expected": 0},
        {"text": "HELLO WORLD", "expected": 0}
    ]

    correct_predictions = 0
    results = []

    for test_case in test_cases:
        if not test_case["text"]:  # Skip empty text
            continue

        result = detector.predict_with_rl(test_case["text"])
        prediction = result['final_prediction']
        expected = test_case["expected"]

        is_correct = prediction == expected
        if is_correct:
            correct_predictions += 1

        results.append({
            "text": test_case["text"],
            "expected": expected,
            "predicted": prediction,
            "probability": result['probability'],
            "correct": is_correct
        })

        status = "✅" if is_correct else "❌"
        print(f"{status} '{test_case['text']}' | Expected: {expected}, Got: {prediction} ({result['probability']:.3f})")

    accuracy = correct_predictions / len([tc for tc in test_cases if tc["text"]])
    print(f"\n🎯 Test Accuracy: {accuracy:.2%} ({correct_predictions}/{len([tc for tc in test_cases if tc['text']])})")

    return results

# ============================================
# USAGE EXAMPLES & DEMO
# ============================================
def demo_interactive_session(detector):
    """Interactive demo session"""
    print("\n🎮 Interactive Toxicity Detection Demo")
    print("Type 'quit' to exit, 'help' for commands")
    print("-" * 50)

    while True:
        try:
            user_input = input("\nEnter text to analyze: ").strip()

            if user_input.lower() == 'quit':
                break
            elif user_input.lower() == 'help':
                print("""
Commands:
- Type any text to analyze toxicity
- 'stats' - Show model statistics
- 'report <text>' - Generate detailed report
- 'quit' - Exit demo
                """)
                continue
            elif user_input.lower() == 'stats':
                stats = {
                    'RL Epsilon': detector.rl_agent.epsilon,
                    'Memory Size': len(detector.rl_agent.memory),
                    'Total Feedback': len(detector.rl_agent.reward_history),
                    'Avg Reward': np.mean(detector.rl_agent.reward_history[-50:]) if detector.rl_agent.reward_history else 0
                }
                for key, value in stats.items():
                    print(f"{key}: {value}")
                continue
            elif user_input.lower().startswith('report '):
                text = user_input[7:]  # Remove 'report ' prefix
                report = create_toxicity_report(detector, text)
                print(json.dumps(report, indent=2))
                continue
            elif not user_input:
                print("Please enter some text to analyze.")
                continue

            # Analyze the text
            result = detector.predict_with_rl(user_input)

            # Display results
            status = "🚨 TOXIC" if result['final_prediction'] else "✅ CLEAN"
            print(f"\nResult: {status}")
            print(f"Confidence: {result['confidence']:.3f}")
            print(f"Probability: {result['probability']:.3f}")
            print(f"Type: {result['toxicity_type']}")

            # Ask for feedback
            feedback = input("\nWas this prediction correct? (y/n/skip): ").strip().lower()
            if feedback in ['y', 'yes']:
                detector.learn_from_feedback(user_input, result, result['final_prediction'], 'correct')
                print("✅ Thank you! The model learned from your feedback.")
            elif feedback in ['n', 'no']:
                detector.learn_from_feedback(user_input, result, 1 - result['final_prediction'], 'incorrect')
                print("✅ Thank you! The model learned from your correction.")

        except KeyboardInterrupt:
            print("\n\nDemo interrupted. Goodbye!")
            break
        except Exception as e:
            print(f"Error: {e}")

# ============================================
# FLASK WEB APPLICATION
# ============================================

# Initialize Flask app and detector globally
app = Flask(__name__)
detector = None

# Dashboard HTML template
DASHBOARD_TEMPLATE = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Toxicity Detection Dashboard</title>
    <style>
        * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }

        body {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            min-height: 100vh;
            padding: 20px;
        }

        .container {
            max-width: 1200px;
            margin: 0 auto;
            background: white;
            border-radius: 15px;
            box-shadow: 0 20px 40px rgba(0,0,0,0.1);
            overflow: hidden;
        }

        .header {
            background: linear-gradient(45deg, #667eea, #764ba2);
            color: white;
            padding: 30px;
            text-align: center;
        }

        .header h1 {
            font-size: 2.5rem;
            margin-bottom: 10px;
        }

        .header p {
            font-size: 1.1rem;
            opacity: 0.9;
        }

        .content {
            padding: 40px;
        }

        .stats-grid {
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
            gap: 20px;
            margin-bottom: 40px;
        }

        .stat-card {
            background: #f8f9ff;
            border: 1px solid #e2e8f0;
            border-radius: 10px;
            padding: 25px;
            text-align: center;
            transition: transform 0.3s ease;
        }

        .stat-card:hover {
            transform: translateY(-5px);
            box-shadow: 0 10px 25px rgba(0,0,0,0.1);
        }

        .stat-number {
            font-size: 2rem;
            font-weight: bold;
            color: #667eea;
            margin-bottom: 10px;
        }

        .stat-label {
            color: #64748b;
            font-weight: 500;
        }

        .test-section {
            background: #f8f9ff;
            border-radius: 10px;
            padding: 30px;
            margin-bottom: 30px;
        }

        .test-section h2 {
            color: #1e293b;
            margin-bottom: 20px;
            font-size: 1.5rem;
        }

        .input-group {
            margin-bottom: 20px;
        }

        .input-group label {
            display: block;
            margin-bottom: 8px;
            color: #374151;
            font-weight: 500;
        }

        .input-group textarea {
            width: 100%;
            padding: 15px;
            border: 2px solid #e2e8f0;
            border-radius: 8px;
            font-size: 1rem;
            resize: vertical;
            min-height: 120px;
            transition: border-color 0.3s ease;
        }

        .input-group textarea:focus {
            outline: none;
            border-color: #667eea;
            box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
        }

        .btn {
            background: linear-gradient(45deg, #667eea, #764ba2);
            color: white;
            border: none;
            padding: 15px 30px;
            border-radius: 8px;
            cursor: pointer;
            font-size: 1rem;
            font-weight: 500;
            transition: all 0.3s ease;
        }

        .btn:hover {
            transform: translateY(-2px);
            box-shadow: 0 5px 15px rgba(102, 126, 234, 0.3);
        }

        .result {
            margin-top: 20px;
            padding: 20px;
            border-radius: 8px;
            border-left: 4px solid;
        }

        .result.toxic {
            background: #fef2f2;
            border-color: #ef4444;
            color: #991b1b;
        }

        .result.clean {
            background: #f0fdf4;
            border-color: #22c55e;
            color: #166534;
        }

        .feature-list {
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
            gap: 15px;
            margin-top: 15px;
        }

        .feature-item {
            background: white;
            padding: 10px;
            border-radius: 5px;
            border: 1px solid #e2e8f0;
        }

        .feedback-section {
            background: #fef3c7;
            border-radius: 8px;
            padding: 20px;
            margin-top: 20px;
        }

        .feedback-buttons {
            display: flex;
            gap: 10px;
            margin-top: 15px;
        }

        .btn-small {
            padding: 8px 16px;
            font-size: 0.9rem;
        }

        .btn-success {
            background: #22c55e;
        }

        .btn-danger {
            background: #ef4444;
        }

        .api-section {
            background: #1e293b;
            color: white;
            border-radius: 10px;
            padding: 30px;
            margin-top: 30px;
        }

        .api-section h2 {
            margin-bottom: 20px;
        }

        .api-endpoint {
            background: #334155;
            border-radius: 5px;
            padding: 15px;
            margin-bottom: 15px;
            font-family: 'Courier New', monospace;
        }

        .method {
            color: #22c55e;
            font-weight: bold;
        }

        .loading {
            display: none;
            text-align: center;
            padding: 20px;
        }

        .spinner {
            border: 4px solid #f3f4f6;
            border-top: 4px solid #667eea;
            border-radius: 50%;
            width: 40px;
            height: 40px;
            animation: spin 1s linear infinite;
            margin: 0 auto 10px;
        }

        @keyframes spin {
            0% { transform: rotate(0deg); }
            100% { transform: rotate(360deg); }
        }
    </style>
</head>
<body>
    <div class="container">
        <div class="header">
            <h1>🛡️ Toxicity Detection Dashboard</h1>
            <p>Enhanced ML Model with Reinforcement Learning</p>
        </div>

        <div class="content">
            <!-- Statistics Section -->
            <div class="stats-grid">
                <div class="stat-card">
                    <div class="stat-number" id="epsilon">{{ stats.epsilon }}</div>
                    <div class="stat-label">RL Epsilon</div>
                </div>
                <div class="stat-card">
                    <div class="stat-number" id="memory-size">{{ stats.memory_size }}</div>
                    <div class="stat-label">Memory Size</div>
                </div>
                <div class="stat-card">
                    <div class="stat-number" id="total-feedback">{{ stats.total_feedback }}</div>
                    <div class="stat-label">Total Feedback</div>
                </div>
                <div class="stat-card">
                    <div class="stat-number" id="avg-reward">{{ "%.3f"|format(stats.avg_reward) }}</div>
                    <div class="stat-label">Avg Reward</div>
                </div>
            </div>

            <!-- Test Section -->
            <div class="test-section">
                <h2>🧪 Test the Model</h2>
                <div class="input-group">
                    <label for="test-text">Enter text to analyze:</label>
                    <textarea id="test-text" placeholder="Type your text here..."></textarea>
                </div>
                <button class="btn" onclick="analyzeText()">Analyze Text</button>

                <div class="loading" id="loading">
                    <div class="spinner"></div>
                    <p>Analyzing text...</p>
                </div>

                <div id="result-container"></div>
            </div>

            <!-- API Documentation -->
            <div class="api-section">
                <h2>📡 API Endpoints</h2>
                <div class="api-endpoint">
                    <span class="method">POST</span> /api/predict
                    <br><small>Analyze text for toxicity</small>
                </div>
                <div class="api-endpoint">
                    <span class="method">POST</span> /api/feedback
                    <br><small>Submit feedback for model improvement</small>
                </div>
                <div class="api-endpoint">
                    <span class="method">GET</span> /api/stats
                    <br><small>Get model statistics</small>
                </div>
            </div>
        </div>
    </div>

    <script>
        async function analyzeText() {
            const text = document.getElementById('test-text').value;
            if (!text.trim()) {
                alert('Please enter some text to analyze');
                return;
            }

            const loading = document.getElementById('loading');
            const resultContainer = document.getElementById('result-container');

            loading.style.display = 'block';
            resultContainer.innerHTML = '';

            try {
                const response = await fetch('/api/predict', {
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/json',
                    },
                    body: JSON.stringify({ text: text })
                });

                const result = await response.json();
                loading.style.display = 'none';

                if (response.ok) {
                    displayResult(result);
                } else {
                    resultContainer.innerHTML = `<div class="result toxic">Error: ${result.error}</div>`;
                }
            } catch (error) {
                loading.style.display = 'none';
                resultContainer.innerHTML = `<div class="result toxic">Network error: ${error.message}</div>`;
            }
        }

        function displayResult(result) {
            const container = document.getElementById('result-container');
            const isToxic = result.final_prediction === 1;
            const status = isToxic ? 'TOXIC 🚨' : 'CLEAN ✅';
            const resultClass = isToxic ? 'toxic' : 'clean';

            const featuresHtml = Object.entries(result.features).map(([key, value]) =>
                `<div class="feature-item"><strong>${key}:</strong> ${value}</div>`
            ).join('');

            container.innerHTML = `
                <div class="result ${resultClass}">
                    <h3>Result: ${status}</h3>
                    <p><strong>Confidence:</strong> ${(result.confidence * 100).toFixed(1)}%</p>
                    <p><strong>Probability:</strong> ${(result.probability * 100).toFixed(1)}%</p>
                    <p><strong>Toxicity Type:</strong> ${result.toxicity_type}</p>

                    <div class="feedback-section">
                        <p><strong>Was this prediction correct?</strong></p>
                        <div class="feedback-buttons">
                            <button class="btn btn-small btn-success" onclick="submitFeedback('${result.text}', true)">
                                ✅ Correct
                            </button>
                            <button class="btn btn-small btn-danger" onclick="submitFeedback('${result.text}', false)">
                                ❌ Incorrect
                            </button>
                        </div>
                    </div>

                    <h4>Text Features:</h4>
                    <div class="feature-list">
                        ${featuresHtml}
                    </div>
                </div>
            `;
        }

        async function submitFeedback(text, isCorrect) {
            try {
                const response = await fetch('/api/feedback', {
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/json',
                    },
                    body: JSON.stringify({
                        text: text,
                        feedback: isCorrect ? 'correct' : 'incorrect'
                    })
                });

                const result = await response.json();

                if (response.ok) {
                    alert('Thank you for your feedback! The model will learn from this.');
                    // Refresh stats
                    location.reload();
                } else {
                    alert('Error submitting feedback: ' + result.error);
                }
            } catch (error) {
                alert('Network error: ' + error.message);
            }
        }

        // Auto-refresh stats every 30 seconds
        setInterval(() => {
            fetch('/api/stats')
                .then(response => response.json())
                .then(data => {
                    document.getElementById('epsilon').textContent = data.epsilon.toFixed(3);
                    document.getElementById('memory-size').textContent = data.memory_size;
                    document.getElementById('total-feedback').textContent = data.total_feedback;
                    document.getElementById('avg-reward').textContent = data.avg_reward.toFixed(3);
                })
                .catch(console.error);
        }, 30000);

        // Enable Enter key for text analysis
        document.getElementById('test-text').addEventListener('keypress', function(e) {
            if (e.key === 'Enter' && e.ctrlKey) {
                analyzeText();
            }
        });
    </script>
</body>
</html>
'''

# Flask routes
@app.route('/')
def index():
    """Home page with basic interface"""
    return '''
    <h1>🛡️ Enhanced Toxicity Detection API</h1>
    <p>Welcome to the Enhanced Toxicity Detection System with Reinforcement Learning!</p>
    <h2>Available Endpoints:</h2>
    <ul>
        <li><a href="/dashboard">📊 Dashboard</a> - Interactive monitoring dashboard</li>
        <li><strong>POST /api/predict</strong> - Analyze text for toxicity</li>
        <li><strong>POST /api/feedback</strong> - Submit feedback for model improvement</li>
        <li><strong>GET /api/stats</strong> - Get model statistics</li>
    </ul>
    <h2>Quick Test:</h2>
    <form action="/api/predict" method="post" style="margin: 20px 0;">
        <textarea name="text" placeholder="Enter text to analyze..." style="width: 400px; height: 100px;"></textarea><br><br>
        <input type="submit" value="Analyze Text" style="padding: 10px 20px; background: #667eea; color: white; border: none; border-radius: 5px;">
    </form>
    '''

@app.route('/dashboard')
def dashboard():
    """Monitoring dashboard"""
    global detector
    if detector is None:
        return jsonify({'error': 'Model not loaded'}), 500

    # Get current statistics
    stats = {
        'epsilon': round(detector.rl_agent.epsilon, 3),
        'memory_size': len(detector.rl_agent.memory),
        'total_feedback': len(detector.rl_agent.reward_history),
        'avg_reward': np.mean(detector.rl_agent.reward_history[-50:]) if detector.rl_agent.reward_history else 0
    }

    return render_template_string(DASHBOARD_TEMPLATE, stats=stats)

@app.route('/api/predict', methods=['POST'])
def predict():
    """Predict toxicity for given text"""
    global detector
    if detector is None:
        return jsonify({'error': 'Model not loaded'}), 500

    try:
        if request.method == 'POST':
            if request.is_json:
                data = request.get_json()
                text = data.get('text', '')
            else:
                text = request.form.get('text', '')

            if not text:
                return jsonify({'error': 'No text provided'}), 400

            # Get prediction
            result = detector.predict_with_rl(text)

            return jsonify(result)

    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/feedback', methods=['POST'])
def feedback():
    """Submit feedback for model improvement"""
    global detector
    if detector is None:
        return jsonify({'error': 'Model not loaded'}), 500

    try:
        data = request.get_json()
        text = data.get('text', '')
        user_feedback = data.get('feedback', '')

        if not text or not user_feedback:
            return jsonify({'error': 'Missing text or feedback'}), 400

        # Get current prediction
        result = detector.predict_with_rl(text)

        # Determine actual label based on feedback
        if user_feedback == 'correct':
            actual_label = result['final_prediction']
        elif user_feedback == 'incorrect':
            actual_label = 1 - result['final_prediction']
        else:
            return jsonify({'error': 'Invalid feedback. Use "correct" or "incorrect"'}), 400

        # Learn from feedback
        detector.learn_from_feedback(text, result, actual_label, user_feedback)

        return jsonify({
            'message': 'Feedback received and processed',
            'text': text,
            'feedback': user_feedback,
            'actual_label': actual_label
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/stats', methods=['GET'])
def stats():
    """Get model statistics"""
    global detector
    if detector is None:
        return jsonify({'error': 'Model not loaded'}), 500

    try:
        return jsonify({
            'epsilon': detector.rl_agent.epsilon,
            'memory_size': len(detector.rl_agent.memory),
            'total_feedback': len(detector.rl_agent.reward_history),
            'avg_reward': np.mean(detector.rl_agent.reward_history[-50:]) if detector.rl_agent.reward_history else 0,
            'threshold_dict': detector.threshold_dict,
            'model_type': 'Enhanced Toxicity Detector with RL'
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/batch', methods=['POST'])
def batch_predict():
    """Batch prediction for multiple texts"""
    global detector
    if detector is None:
        return jsonify({'error': 'Model not loaded'}), 500

    try:
        data = request.get_json()
        texts = data.get('texts', [])

        if not texts or not isinstance(texts, list):
            return jsonify({'error': 'Please provide a list of texts'}), 400

        if len(texts) > 100:
            return jsonify({'error': 'Maximum 100 texts per batch'}), 400

        results = []
        for text in texts:
            if text.strip():
                result = detector.predict_with_rl(text)
                results.append(result)

        return jsonify({
            'total_processed': len(results),
            'results': results
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500

# ============================================
# MAIN EXECUTION
# ============================================
if __name__ == "__main__":
    print("🛡️ Enhanced Toxicity Detection System")
    print("=" * 50)

    # Check if we should run training or use existing model
    choice = input("Choose option:\n1. Train new model\n2. Load existing model\n3. Interactive demo\n4. Start Flask server\nEnter choice (1-4): ").strip()

    if choice == "1":
        print("\n🚀 Starting training pipeline...")
        detector = main_training_pipeline()

        # Run tests
        run_comprehensive_tests(detector)

        # Export insights
        export_model_insights(detector)

        print(f"\n✅ Training completed successfully!")
        print(f"Model saved to: {MODEL_PATH}")
        print(f"RL Agent saved to: {RL_MODEL_PATH}")

    elif choice == "2":
        detector = EnhancedToxicityDetector()
        if detector.load_model():
            print("✅ Model loaded successfully!")

            # Run some tests
            test_results = run_comprehensive_tests(detector)

            # Option for interactive demo
            if input("\nStart interactive demo? (y/n): ").lower() == 'y':
                demo_interactive_session(detector)
        else:
            print("❌ No existing model found. Please train a new model first.")

    elif choice == "3":
        detector = EnhancedToxicityDetector()
        if detector.load_model():
            demo_interactive_session(detector)
        else:
            print("❌ No existing model found. Please train a new model first.")

    elif choice == "4":
        if FLASK_AVAILABLE:
            detector = EnhancedToxicityDetector()
            if not detector.load_model():
                print("❌ No existing model found. Creating a dummy model for demonstration...")
                # Create a simple demo model
                from sklearn.feature_extraction.text import TfidfVectorizer
                from sklearn.linear_model import LogisticRegression
                from sklearn.pipeline import Pipeline

                # Create dummy training data
                dummy_texts = [
                    "You're so stupid!", "I hate you!", "This is great work", "Thank you for helping",
                    "You're worthless", "Have a nice day", "Go kill yourself", "I love this project"
                ]
                dummy_labels = [1, 1, 0, 0, 1, 0, 1, 0]

                # Simple pipeline
                detector.pipeline = Pipeline([
                    ('tfidf', TfidfVectorizer(max_features=1000)),
                    ('clf', LogisticRegression())
                ])

                detector.pipeline.fit(dummy_texts, dummy_labels)
                detector.threshold_dict['binary'] = 0.5
                print("✅ Demo model created!")

            # Set up ngrok tunnel
            port = 5000

            try:
                # You must get your own authtoken from ngrok.com
                ngrok.set_auth_token("2igT3TPuLOjBYXVnG35DPjod9He_7A9pbGAqNiDofSiX9hh5N")
                public_url = ngrok.connect(port).public_url

                print("🌐 Starting Flask server...")
                print("Access the web interface at:", public_url)
                print("Local access:", f"http://127.0.0.1:{port}")
            except Exception as e:
                print(f"⚠️ Ngrok setup failed: {e}")
                print("🌐 Starting Flask server locally...")
                print("Local access:", f"http://127.0.0.1:{port}")

            print("API endpoints:")
            print("  - GET / - Home page")
            print("  - GET /dashboard - Monitoring dashboard")
            print("  - POST /api/predict - Analyze text")
            print("  - POST /api/feedback - Submit feedback")
            print("  - GET /api/stats - Model statistics")
            print("  - POST /api/batch - Batch analysis")

            app.run(host='0.0.0.0', port=port, debug=True, use_reloader=False)
        else:
            print("❌ Flask not available. Please install Flask: pip install flask pyngrok")

    else:
        print("Invalid choice. Please run the script again.")

🛡️ Enhanced Toxicity Detection System
Choose option:
1. Train new model
2. Load existing model
3. Interactive demo
4. Start Flask server
Enter choice (1-4): 3
✅ Model loaded from enhanced_toxicity_model.pkl

🎮 Interactive Toxicity Detection Demo
Type 'quit' to exit, 'help' for commands
--------------------------------------------------

Enter text to analyze: What is this Ram?

Result: 🚨 TOXIC
Confidence: 0.103
Probability: 0.448
Type: insult

Was this prediction correct? (y/n/skip): y
✅ RL agent learned from feedback. Reward: 1.50
✅ Thank you! The model learned from your feedback.

Enter text to analyze: quit
