In [8]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import re

ModuleNotFoundError: No module named 'pandas'

In [None]:
class TextModerator:
    def __init__(self):
        self.offensive_words = set()
        self.vectorizer = TfidfVectorizer()
        self.classifier = LogisticRegression()
        self.load_data()
    
    def load_data(self):
        """Load all necessary data"""
        # Load offensive words
        with open('../data/offensive_words.txt', 'r', encoding='utf-8') as f:
            self.offensive_words = set(word.strip().lower() for word in f.readlines())
        
        # Load training examples
        with open('../data/training_data/safe_examples.txt', 'r', encoding='utf-8') as f:
            safe_examples = [line.strip() for line in f.readlines()]
            
        with open('../data/training_data/unsafe_examples.txt', 'r', encoding='utf-8') as f:
            unsafe_examples = [line.strip() for line in f.readlines()]
            
        # Create training dataset
        self.training_data = pd.DataFrame({
            'text': safe_examples + unsafe_examples,
            'is_unsafe': [0] * len(safe_examples) + [1] * len(unsafe_examples)
        })
    
    def train_model(self):
        """Train the moderation model"""
        X = self.vectorizer.fit_transform(self.training_data['text'])
        y = self.training_data['is_unsafe']
        self.classifier.fit(X, y)
    
    def check_text(self, text):
        """Check if text is safe or unsafe"""
        # Direct word matching
        text_lower = text.lower()
        for word in self.offensive_words:
            if word in text_lower:
                return {
                    'is_safe': False,
                    'reason': f'Contains offensive word',
                    'confidence': 1.0
                }
        
        # ML-based check
        text_vector = self.vectorizer.transform([text])
        prediction = self.classifier.predict(text_vector)[0]
        probability = self.classifier.predict_proba(text_vector)[0]
        
        return {
            'is_safe': prediction == 0,
            'reason': 'ML model prediction',
            'confidence': max(probability)
        }