In [1]:
# !pip install pandas transformers scikit-learn matplotlib seaborn sentencepiece accelerate -q
# !pip install protobuf

# import pandas as pd
# hate_yes_data = pd.read_csv('/root/ccs_aisf/data/yes_no/hate_vs_antagonist_yes.csv')
# hate_no_data = pd.read_csv('/root/ccs_aisf/data/yes_no/hate_vs_antagonist_no.csv')

# torch.set_default_tensor_type(torch.cuda.HalfTensor)

# !pip install nltk sentence-transformers
# import nltk

# nltk.download('wordnet')
# nltk.download('punkt')
# nltk.download('stopwords')

In [2]:
import re
import pickle
import pandas as pd
from sklearn.metrics import accuracy_score
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForMaskedLM, AutoModelForSequenceClassification
# from transformers import EncoderDecoderModel, BertTokenizer, DistilBertTokenizer
# from transformers import AutoModelForCausalLM

from sklearn.linear_model import LogisticRegression
import numpy as np
from tqdm import tqdm

import sys
import os

# Add the code directory directly to Python path
code_dir = '/Users/elenaericheva/ericheva_git/ccs_aisf/code'
if code_dir not in sys.path:
    sys.path.insert(0, code_dir)

print(f"Added {code_dir} to Python path")

  from .autonotebook import tqdm as notebook_tqdm


Added /Users/elenaericheva/ericheva_git/ccs_aisf/code to Python path


# Data Sanity Check

In [3]:

import pandas as pd
from IPython.display import display

def load_and_validate_data():
    """
    Load and validate all datasets
    CHANGED: Added comprehensive validation pipeline
    """
    datasets = {}
    print("STARTING COMPREHENSIVE HATE SPEECH DATASET SANITY CHECK")
    print("="*60)
    # Load hate data
    # MIXED!!!
    hate_data = pd.read_csv('../data/raw/total_hate_data.csv', index_col=0)
    datasets['hate_data'] = hate_data
    # Load yes/no hate data
    hate_total_yes_data = pd.read_csv('../data/yes_no/hate_total_yes_data.csv', index_col=0)
    hate_total_no_data = pd.read_csv('../data/yes_no/hate_total_no_data.csv', index_col=0)
    datasets['hate_total_yes_data'] = hate_total_yes_data
    datasets['hate_total_no_data'] = hate_total_no_data
    
    # NOT!!!    
    # Load hate_not data DUMP!!!
    hate_data_not = pd.read_csv('../data/raw/total_hate3.0.csv', index_col=0)
    datasets['hate_data_not'] = hate_data_not
    
    # Load yes/no hate_not data
    hate_total_yes_data_not = pd.read_csv('../data/yes_no/hate_total3.0_yes.csv', index_col=0)
    hate_total_no_data_not = pd.read_csv('../data/yes_no/hate_total3.0_no.csv', index_col=0)
    datasets['hate_total_yes_data_not'] = hate_total_yes_data_not
    datasets['hate_total_no_data_not'] = hate_total_no_data_not
    

    # TTT!!!
    # Process data with 'not' -> 'ttt' replacement
    hate_total_yes_data_not_ttt = hate_total_yes_data_not.copy()
    hate_total_no_data_not_ttt = hate_total_no_data_not.copy()
    hate_total_yes_data_not_ttt['statement'] = hate_total_yes_data_not['statement'].apply(
        lambda x: x.replace('not', 'ttt').replace('Not', 'ttt'))
    hate_total_no_data_not_ttt['statement'] = hate_total_no_data_not['statement'].apply(
        lambda x: x.replace('not', 'ttt').replace('Not', 'ttt'))
    datasets['hate_total_yes_data_not_ttt'] = hate_total_yes_data_not_ttt
    datasets['hate_total_no_data_not_ttt'] = hate_total_no_data_not_ttt

    return datasets

datasets = load_and_validate_data()


STARTING COMPREHENSIVE HATE SPEECH DATASET SANITY CHECK


In [4]:
def validate_dataset_structure(datasets):
    """
    Validate that all datasets have the correct structure
    CHANGED: Added explicit structure validation without try-except
    CHANGED: Added null label rows display functionality
    """
    print("=== DATASET STRUCTURE VALIDATION ===")
    
    for name, df in datasets.items():
        print(f"\nDataset: {name}")
        print(f"Shape: {df.shape}")
        print(f"Columns: {list(df.columns)}")
        
        # Check for required columns
        required_cols = ['statement', 'is_harmfull_opposition']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            print(f"⚠️  Missing columns: {missing_cols}")
        else:
            print("✅ All required columns present")
        
        # Check for null values
        null_counts = df.isnull().sum()
        if null_counts.any():
            print(f"⚠️  Null values found: {null_counts[null_counts > 0].to_dict()}")
            
            # CHANGED: Display rows with null labels
            null_label_mask = df['is_harmfull_opposition'].isnull()
            if null_label_mask.any():
                print(f"\n📋 Rows with null labels ({null_label_mask.sum()} total):")
                null_rows = df[null_label_mask]
                for idx, row in null_rows.iterrows():
                    print(f"  Index {idx}: '{row['statement']}{'...' if len(row['statement']) > 100 else ''}'")
        else:
            print("✅ No null values found")
        
        # Check label distribution
        label_dist = df['is_harmfull_opposition'].value_counts().sort_index()
        print(f"Label distribution: {label_dist.to_dict()}")

validate_dataset_structure(datasets)

=== DATASET STRUCTURE VALIDATION ===

Dataset: hate_data
Shape: (1244, 2)
Columns: ['statement', 'is_harmfull_opposition']
✅ All required columns present
✅ No null values found
Label distribution: {0: 622, 1: 622}

Dataset: hate_total_yes_data
Shape: (1244, 2)
Columns: ['statement', 'is_harmfull_opposition']
✅ All required columns present
✅ No null values found
Label distribution: {0: 622, 1: 622}

Dataset: hate_total_no_data
Shape: (1244, 2)
Columns: ['statement', 'is_harmfull_opposition']
✅ All required columns present
✅ No null values found
Label distribution: {0: 622, 1: 622}

Dataset: hate_data_not
Shape: (1250, 2)
Columns: ['statement', 'is_harmfull_opposition']
✅ All required columns present
✅ No null values found
Label distribution: {0: 625, 1: 625}

Dataset: hate_total_yes_data_not
Shape: (1250, 2)
Columns: ['statement', 'is_harmfull_opposition']
✅ All required columns present
✅ No null values found
Label distribution: {0: 625, 1: 625}

Dataset: hate_total_no_data_not
Shape: (

In [5]:

import re

def extract_core_sentence(statement):
    """
    Extract core sentence by removing all expected modifications
    CHANGED: Added function to normalize sentences for comparison
    """
    # Remove Yes/No suffixes (.Yes. .No. .Yes .No)
    core = re.sub(r'[.!?]?(Yes|No)\.?$', '', statement)
    
    # Replace ttt back to not
    core = core.replace('ttt', 'not').replace('TTT', 'NOT')
    
    # Normalize whitespace and punctuation
    core = re.sub(r'\s+', ' ', core.strip())
    
    # Remove trailing periods if they exist
    core = core.rstrip('.!')
    
    return core.lower()

def check_sentence_order_consistency(datasets, reference_name):
    """
    Check if sentence order is consistent across all datasets using core sentence comparison
    CHANGED: Added smart sentence comparison that ignores expected modifications
    """
    print("\n=== SENTENCE ORDER CONSISTENCY CHECK ===")
    
    # Extract core statements for comparison
    core_statements = {}
    
    for name, df in datasets.items():
        statements = df['statement'].copy()
        core_statements[name] = [extract_core_sentence(stmt) for stmt in statements]
    
    # Use the dataset with the most "basic" format as reference
    reference_statements = core_statements[reference_name]
    
    print(f"Using {reference_name} as reference ({len(reference_statements)} statements)")
    
    # Show first few reference statements for verification
    print(f"\nFirst 3 reference core statements from {reference_name}:")
    for i in range(min(3, len(reference_statements))):
        print(f"  {i}: {reference_statements[i]}")
    
    for name, statements in core_statements.items():
        if name == reference_name:
            continue
            
        print(f"\nComparing {name}:")
        print(f"Length: {len(statements)} (vs reference: {len(reference_statements)})")
        
        # Show first few statements from current dataset for comparison
        print(f"First 3 core statements from {name}:")
        for i in range(min(3, len(statements))):
            print(f"  {i}: {statements[i]}")
        
        # Check if lengths match
        if len(statements) != len(reference_statements):
            print(f"⚠️  Length mismatch!")
            continue
        
        # Check order consistency using core sentences
        mismatches = []
        for i, (ref_stmt, curr_stmt) in enumerate(zip(reference_statements, statements)):
            if ref_stmt != curr_stmt:
                mismatches.append(i)
        
        if mismatches:
            print(f"⚠️  Order mismatches at indices: {mismatches[:10]}{'...' if len(mismatches) > 10 else ''}")
            print(f"Total mismatches: {len(mismatches)}")
            
            # Show first few mismatches for debugging
            for i, idx in enumerate(mismatches):
                print(f"  Index {idx}:")
                print(f"    Reference: {reference_statements[idx]}")
                print(f"    Current:   {statements[idx]}")
        else:
            print("✅ Order matches reference")
        

check_sentence_order_consistency({"hate_data": datasets['hate_data'], "hate_total_yes_data": datasets['hate_total_yes_data'], "hate_total_no_data": datasets['hate_total_no_data']}, 'hate_data')



check_sentence_order_consistency({"hate_data_not": datasets['hate_data_not'], "hate_total_yes_data_not": datasets['hate_total_yes_data_not'], "hate_total_no_data_not": datasets['hate_total_no_data_not'], "hate_total_yes_data_not_ttt": datasets['hate_total_yes_data_not_ttt'], "hate_total_no_data_not_ttt": datasets['hate_total_no_data_not_ttt']}, 'hate_data_not')



=== SENTENCE ORDER CONSISTENCY CHECK ===
Using hate_data as reference (1244 statements)

First 3 reference core statements from hate_data:
  0: homosexuals want to have sex with as many different men as possible
  1: violence against women is a migrant problem
  2: blacks are just degenerates

Comparing hate_total_yes_data:
Length: 1244 (vs reference: 1244)
First 3 core statements from hate_total_yes_data:
  0: homosexuals want to have sex with as many different men as possible
  1: violence against women is a migrant problem
  2: blacks are just degenerates
✅ Order matches reference

Comparing hate_total_no_data:
Length: 1244 (vs reference: 1244)
First 3 core statements from hate_total_no_data:
  0: homosexuals want to have sex with as many different men as possible
  1: violence against women is a migrant problem
  2: blacks are just degenerates
✅ Order matches reference

=== SENTENCE ORDER CONSISTENCY CHECK ===
Using hate_data_not as reference (1250 statements)

First 3 reference c

In [6]:
import re
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from difflib import SequenceMatcher
from tqdm import tqdm
import nltk
from collections import defaultdict

# Add lemmatization support
from nltk.stem import WordNetLemmatizer

def setup_nltk_data():
    """Download NLTK data and setup fallbacks"""
    # Download required NLTK data
    nltk.download('wordnet', quiet=True)
    nltk.download('punkt', quiet=True)
    nltk.download('punkt_tab', quiet=True)  # New tokenizer format
    nltk.download('stopwords', quiet=True)
    nltk.download('omw-1.4', quiet=True)  # For lemmatizer
    
    # Initialize stopwords and lemmatizer with fallback
    global stop_words, lemmatizer
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()

# Fallback tokenizer that doesn't require NLTK punkt
def simple_word_tokenize(text):
    """Simple word tokenization as fallback when NLTK punkt is not available"""
    # Remove punctuation and split on whitespace
    text = re.sub(r'[^\w\s]', ' ', text)
    return text.split()

def safe_word_tokenize(text):
    """Safe word tokenization with fallback"""
    return word_tokenize(text)

def lemmatize_word(word):
    """Lemmatize a word to its base form"""
    # Try both noun and verb forms, return the shorter one
    noun_form = lemmatizer.lemmatize(word, 'n')
    verb_form = lemmatizer.lemmatize(word, 'v')
    adj_form = lemmatizer.lemmatize(word, 'a')
    
    # Return the shortest form (usually the base form)
    candidates = [noun_form, verb_form, adj_form]
    return min(candidates, key=len)

# Setup NLTK data
setup_nltk_data()

def get_synonyms_wordnet(word):
    """Get synonyms for a word using WordNet"""
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonym = lemma.name().replace('_', ' ').lower()
            if synonym != word.lower():  # Don't include the word itself
                synonyms.add(synonym)
    return list(synonyms)

def create_dynamic_synonym_mapping(sentences):
    """Create synonym mapping using NLTK WordNet synonyms with lemmatization and bias-specific terms"""
    # Extract all meaningful words from sentences with lemmatization
    all_words = set()
    for sentence in sentences:
        words = safe_word_tokenize(sentence.lower())
        for word in words:
            word_clean = re.sub(r'[^\w\s]', '', word)
            if word_clean and word_clean not in stop_words and len(word_clean) > 2:
                # CHANGED: Add both original and lemmatized forms
                lemmatized = lemmatize_word(word_clean)
                all_words.add(word_clean)
                all_words.add(lemmatized)
    
    # CHANGED: Add comprehensive bias-specific synonym groups that WordNet doesn't cover
    bias_specific_synonyms = {
        # Sexual orientation slurs and terms
        'gay': ['fag', 'faggot', 'fags', 'homosexual', 'homosexuals', 'queer', 'lgbtq', 'gay'],
        
        # Racial slurs and terms (mapping to neutral terms)
        'black': ['nigger', 'niggers', 'african', 'dark'],
        'people': ['person', 'individual', 'individuals'],
        
        # Gender terms
        'women': ['woman', 'female', 'females', 'girl', 'girls', 'lady', 'ladies'],
        'men': ['man', 'male', 'males', 'boy', 'boys', 'guy', 'guys'],
        
        # Weight-related terms
        'overweight': ['fat', 'obese', 'heavy'],
        'thin': ['skinny', 'slim', 'lean'],
        
        # Intelligence terms
        'intelligent': ['smart', 'clever', 'bright', 'brilliant'],
        'unintelligent': ['stupid', 'dumb', 'foolish', 'ignorant'],
        
        # Wealth terms
        'wealthy': ['rich', 'affluent'],
        'poor': ['impoverished', 'broke'],
        
        # Immigration terms (neutral mapping)
        'migration': ['immigration', 'movement', 'invasion', 'influx'],
        'migrants': ['immigrants', 'foreigners', 'newcomers', 'refugees'],
        'migrate': ['immigrate', 'move', 'invade'],
        
        # Ability/disability terms
        'disabled': ['handicapped', 'impaired'],
        'able': ['capable', 'fit'],
        
        # Appearance terms
        'attractive': ['beautiful', 'pretty', 'handsome', 'good-looking'],
        'unattractive': ['ugly', 'homely'],
        
        # Action/intention terms
        'deliberately': ['intentionally', 'purposely', 'consciously'],
        'accidentally': ['unintentionally', 'mistakenly'],
        
        # Modal verbs
        'should': ['must', 'ought', 'need'],
        'can': ['able', 'capable'],
        'may': ['might', 'could'],
        
        # Quality terms
        'good': ['excellent', 'great', 'fine', 'positive'],
        'bad': ['terrible', 'awful', 'poor', 'negative'],
        
        # Authority/leadership
        'leadership': ['management', 'authority', 'control', 'command'],
        'leader': ['manager', 'boss', 'chief', 'head'],
        
        # Emotional/behavioral terms
        'violent': ['aggressive', 'hostile'],
        'peaceful': ['calm', 'gentle'],
        'angry': ['mad', 'furious', 'upset'],
        
        # Relationship terms
        'parent': ['father', 'mother', 'dad', 'mom'],
        'child': ['kid', 'children', 'son', 'daughter'],
        'family': ['household', 'relatives'],
        
        # Discrimination terms
        'discrimination': ['bias', 'prejudice', 'racism', 'sexism'],
        'equality': ['fairness', 'justice', 'equal'],
        'superior': ['better', 'higher', 'greater'],
        'inferior': ['worse', 'lower', 'lesser'],
    }
    
    # CHANGED: Build comprehensive synonym groups starting with bias-specific terms
    word_to_canonical = {}
    processed_words = set()
    
    # First, apply bias-specific synonym mappings
    for canonical, synonym_list in bias_specific_synonyms.items():
        # Find which words from this group appear in our vocabulary
        present_words = [word for word in [canonical] + synonym_list if word in all_words]
        if present_words:
            # Use the canonical term as the standard form
            for word in present_words:
                if word not in word_to_canonical:
                    word_to_canonical[word] = canonical
                    processed_words.add(word)
    
    # Then process remaining words with WordNet
    for word in all_words:
        if word in processed_words:
            continue
            
        # Get all synonyms for this word using WordNet
        synonyms = set(get_synonyms_wordnet(word))
        
        # Add lemmatized forms of synonyms
        expanded_synonyms = set()
        for synonym in synonyms:
            expanded_synonyms.add(synonym)
            expanded_synonyms.add(lemmatize_word(synonym))
        
        # Find all synonyms that exist in our vocabulary and aren't already processed
        relevant_synonyms = {word}  # Start with the word itself
        relevant_synonyms.update(syn for syn in expanded_synonyms if syn in all_words and syn not in processed_words)
        
        # Only create a group if we have multiple related words
        if len(relevant_synonyms) > 1:
            # Use the shortest word as canonical (usually the lemma)
            canonical = min(relevant_synonyms, key=len)
            
            # Map all words in this group to the canonical form
            for syn_word in relevant_synonyms:
                if syn_word not in word_to_canonical:
                    word_to_canonical[syn_word] = canonical
                    processed_words.add(syn_word)
        else:
            # Single word, map to its lemmatized form
            lemmatized = lemmatize_word(word)
            word_to_canonical[word] = lemmatized
            processed_words.add(word)
    
    return word_to_canonical

def normalize_with_synonyms(sentence, synonym_map):
    """Normalize sentence using NLTK-derived synonym replacement"""
    sentence = sentence.lower().strip()
    
    # Remove negations first
    negation_patterns = [
        (r'\bnot\s+', ''), (r'\bdoes\s+not\s+', 'does '), (r'\bdo\s+not\s+', 'do '),
        (r'\bcan\s+not\s+', 'can '), (r'\bcannot\s+', 'can '), (r'\baren\'t\s+', 'are '),
        (r'\bisn\'t\s+', 'is '), (r'\bwon\'t\s+', 'will '), (r'\bdon\'t\s+', 'do '),
        (r'\bdoesn\'t\s+', 'does '), (r'\bcan\'t\s+', 'can '), (r'\bshouldn\'t\s+', 'should '),
        (r'\bwouldn\'t\s+', 'would '), (r'\bare\s+not\s+', 'are '), (r'\bis\s+not\s+', 'is '),
        (r'\bshould\s+not\s+', 'should '), (r'\bwill\s+not\s+', 'will '), (r'\bwould\s+not\s+', 'would '),
        (r'\bhave\s+not\s+', 'have '), (r'\bhas\s+not\s+', 'has '), (r'\bhaven\'t\s+', 'have '),
        (r'\bhasn\'t\s+', 'has '),
    ]
    
    for pattern, replacement in negation_patterns:
        sentence = re.sub(pattern, replacement, sentence)
    
    # Handle special replacements for better semantic matching
    special_replacements = [
        ('ttt', 'not'),
        ('TTT', 'NOT'),
        (r'\bto be\b', 'be'),
        (r'\bable to be\b', 'be'),
        (r'\bgood at\b', 'good'),
        (r'\bcapable of being\b', 'be'),
    ]
    
    for pattern, replacement in special_replacements:
        sentence = re.sub(pattern, replacement, sentence)
    
    # Tokenize and replace with canonical forms using NLTK-derived synonyms
    words = safe_word_tokenize(sentence)
    normalized_words = []
    
    for word in words:
        word_clean = re.sub(r'[^\w\s]', '', word.lower())
        if word_clean in synonym_map:
            normalized_words.append(synonym_map[word_clean])
        elif word_clean not in stop_words and len(word_clean) > 2:
            # CHANGED: Use lemmatized form if not in synonym map
            lemmatized = lemmatize_word(word_clean)
            normalized_words.append(synonym_map.get(lemmatized, lemmatized))
    
    return ' '.join(normalized_words)

def calculate_tfidf_similarity(sentences1, sentences2):
    """Calculate TF-IDF based similarity using scikit-learn"""
    # Combine all sentences for vocabulary building
    all_sentences = sentences1 + sentences2
    
    # Create TF-IDF vectorizer
    vectorizer = TfidfVectorizer(
        lowercase=True,
        stop_words='english',
        ngram_range=(1, 2),  # Include bigrams for better context
        max_features=10000,  # Limit features to prevent memory issues
        min_df=1,  # Minimum document frequency
        max_df=0.95  # Maximum document frequency
    )
    
    # Fit and transform all sentences
    tfidf_matrix = vectorizer.fit_transform(all_sentences)
    
    # Split back into two sets
    n_sent1 = len(sentences1)
    tfidf1 = tfidf_matrix[:n_sent1]
    tfidf2 = tfidf_matrix[n_sent1:]
    
    # Calculate cosine similarities
    similarities = []
    for i in range(len(sentences1)):
        sim = cosine_similarity(tfidf1[i], tfidf2[i])[0, 0]
        similarities.append(sim)
    
    return similarities

def calculate_enhanced_string_similarity(sent1, sent2):
    """Calculate enhanced string similarity using multiple methods"""
    # Method 1: SequenceMatcher ratio
    seq_sim = SequenceMatcher(None, sent1.lower(), sent2.lower()).ratio()
    
    # Method 2: Word-level Jaccard similarity
    words1 = set(sent1.lower().split())
    words2 = set(sent2.lower().split())
    
    if len(words1) == 0 and len(words2) == 0:
        jaccard_sim = 1.0
    elif len(words1) == 0 or len(words2) == 0:
        jaccard_sim = 0.0
    else:
        intersection = len(words1.intersection(words2))
        union = len(words1.union(words2))
        jaccard_sim = intersection / union if union > 0 else 0.0
    
    # Method 3: Character n-gram similarity
    def get_ngrams(text, n=3):
        return set([text[i:i+n] for i in range(len(text)-n+1)])
    
    ngrams1 = get_ngrams(sent1.lower())
    ngrams2 = get_ngrams(sent2.lower())
    
    if len(ngrams1) == 0 and len(ngrams2) == 0:
        ngram_sim = 1.0
    elif len(ngrams1) == 0 or len(ngrams2) == 0:
        ngram_sim = 0.0
    else:
        ngram_intersection = len(ngrams1.intersection(ngrams2))
        ngram_union = len(ngrams1.union(ngrams2))
        ngram_sim = ngram_intersection / ngram_union if ngram_union > 0 else 0.0
    
    # Combine similarities
    combined_sim = 0.5 * seq_sim + 0.3 * jaccard_sim + 0.2 * ngram_sim
    return combined_sim

def calculate_order_independent_similarity(sent1, sent2):
    """Calculate order-independent similarity using bag-of-words approaches"""
    # Method 1: Word overlap (Jaccard similarity)
    words1 = set(sent1.lower().split())
    words2 = set(sent2.lower().split())
    
    if len(words1) == 0 and len(words2) == 0:
        jaccard_sim = 1.0
    elif len(words1) == 0 or len(words2) == 0:
        jaccard_sim = 0.0
    else:
        intersection = len(words1.intersection(words2))
        union = len(words1.union(words2))
        jaccard_sim = intersection / union if union > 0 else 0.0
    
    # Method 2: Cosine similarity on word counts
    all_words = list(words1.union(words2))
    if not all_words:
        return 0.0
    
    # Create word count vectors
    vec1 = np.array([1 if word in words1 else 0 for word in all_words])
    vec2 = np.array([1 if word in words2 else 0 for word in all_words])
    
    # Calculate cosine similarity
    if np.linalg.norm(vec1) == 0 or np.linalg.norm(vec2) == 0:
        cosine_sim = 0.0
    else:
        cosine_sim = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
    
    # Method 3: Overlap coefficient (focuses on smaller set)
    if len(words1) == 0 and len(words2) == 0:
        overlap_coeff = 1.0
    elif len(words1) == 0 or len(words2) == 0:
        overlap_coeff = 0.0
    else:
        intersection_size = len(words1.intersection(words2))
        min_size = min(len(words1), len(words2))
        overlap_coeff = intersection_size / min_size
    
    # Combine methods
    combined_sim = 0.4 * jaccard_sim + 0.4 * cosine_sim + 0.2 * overlap_coeff
    return combined_sim

def calculate_comprehensive_similarity(sent1, sent2, synonym_map):
    """Calculate comprehensive similarity combining multiple approaches"""
    # Method 1: Direct TF-IDF similarity
    tfidf_sim = calculate_tfidf_similarity([sent1], [sent2])[0]
    
    # Method 2: Normalized text similarity
    norm1 = normalize_with_synonyms(sent1, synonym_map)
    norm2 = normalize_with_synonyms(sent2, synonym_map)
    
    # Calculate TF-IDF on normalized text
    if norm1.strip() and norm2.strip():
        normalized_tfidf_sim = calculate_tfidf_similarity([norm1], [norm2])[0]
        # Enhanced string similarity on normalized text
        normalized_string_sim = calculate_enhanced_string_similarity(norm1, norm2)
        # CHANGED: Add order-independent similarity on normalized text
        normalized_order_independent = calculate_order_independent_similarity(norm1, norm2)
    else:
        normalized_tfidf_sim = 0.0
        normalized_string_sim = 0.0
        normalized_order_independent = 0.0
    
    # Method 3: Exact match bonus for normalized text
    exact_match_bonus = 1.0 if norm1.strip() == norm2.strip() and norm1.strip() else 0.0
    
    # Method 4: Enhanced string similarity on original text
    original_string_sim = calculate_enhanced_string_similarity(sent1, sent2)
    
    # CHANGED: Method 5: Order-independent similarity on original text
    original_order_independent = calculate_order_independent_similarity(sent1, sent2)
    
    # CHANGED: Combine similarities with enhanced weights including order-independent metrics
    final_similarity = (
        0.15 * tfidf_sim + 
        0.25 * normalized_tfidf_sim + 
        0.15 * normalized_string_sim +
        0.2 * normalized_order_independent +
        0.1 * original_string_sim +
        0.1 * original_order_independent +
        0.05 * exact_match_bonus
    )
    
    return final_similarity

def extract_key_concepts_smart(sentence, synonym_map):
    """Extract key concepts using pattern matching and normalization"""
    normalized = normalize_with_synonyms(sentence, synonym_map)
    
    concept_patterns = [
        r'\b(sexual orientation|gay|lesbian|lgbtq|transgender|trans|bisexual|queer)\b',
        r'\b(fat|obese|overweight|thin|skinny)\b', 
        r'\b(healthy|health|unhealthy|fit|wellness)\b',
        r'\b(parent|parenting|children|kids|child|good parent|childrearing)\b', 
        r'\b(leadership|leader|lead|command|control|suited|management|guidance)\b',
        r'\b(muslim|islam|arab|jewish|christian|religious|atheist)\b',
        r'\b(black|white|asian|hispanic|latino|indigenous|african|races|race)\b',
        r'\b(women|men|girls|boys|female|male|ladies|guys)\b', 
        r'\b(disabled|disability|wheelchair|deaf|blind|autism|adhd)\b',
        r'\b(immigrant|refugee|foreigner|migrants|newcomers|migration|immigration)\b', 
        r'\b(poor|rich|wealthy|poverty|homeless)\b',
        r'\b(terrorist|crime|criminal|violence|dangerous)\b', 
        r'\b(smart|intelligent|stupid|dumb|education)\b',
        r'\b(work|employment|job|unemployed)\b', 
        r'\b(beautiful|attractive|ugly|appearance)\b',
        r'\b(equal|equality|superior|worth|same|equivalent|better|worse)\b',
    ]
    
    concepts = set()
    for pattern in concept_patterns:
        matches = re.findall(pattern, normalized, re.IGNORECASE)
        concepts.update([match.lower() for match in matches])
    
    return concepts

def calculate_batch_similarities(sentences1, sentences2, batch_size=100):
    """Calculate similarities in batches for memory efficiency"""
    similarities = []
    
    for i in range(0, len(sentences1), batch_size):
        batch_end = min(i + batch_size, len(sentences1))
        
        batch_sent1 = sentences1[i:batch_end]
        batch_sent2 = sentences2[i:batch_end]
        
        # Calculate TF-IDF similarities for this batch
        batch_similarities = calculate_tfidf_similarity(batch_sent1, batch_sent2)
        similarities.extend(batch_similarities)
    
    return similarities

def check_sentence_order_consistency_smart_nltk(datasets, reference_name):
    """Smart check using NLTK-derived synonyms and sklearn TF-IDF"""
    print("🤖 SMART NLTK-BASED SENTENCE ORDER CONSISTENCY CHECK")
    print("="*70)
    print("🔧 Using: NLTK WordNet synonyms + scikit-learn TF-IDF")
    
    # Ensure reference exists
    if reference_name not in datasets:
        reference_name = list(datasets.keys())[0]
    
    reference_statements = datasets[reference_name]['statement'].tolist()
    
    # Build dynamic synonym mapping from all sentences in the reference dataset
    print("🔍 Building dynamic synonym mapping using NLTK WordNet...")
    synonym_map = create_dynamic_synonym_mapping(reference_statements)
    print(f"📊 Created synonym mappings for {len(synonym_map)} words")
    
    for name, df in datasets.items():
        if name == reference_name:
            continue
            
        current_statements = df['statement'].tolist()
        
        print(f"\n🔄 Analyzing {name} with NLTK-enhanced similarity:")
        
        # Check length
        if len(current_statements) != len(reference_statements):
            print(f"🔴 Length mismatch! ({len(current_statements)} vs {len(reference_statements)})")
            continue
        
        # Calculate similarities using optimized batch processing
        perfect_matches = 0
        good_matches = 0
        poor_matches = []
        moderate_matches = []
        
        print("🚀 Processing TF-IDF similarities in batches...")
        
        # Method 1: Direct TF-IDF similarities (batch processing)
        direct_similarities = calculate_batch_similarities(
            reference_statements, current_statements, batch_size=200
        )
        
        # Method 2: Normalized TF-IDF similarities using NLTK synonyms
        print("🔄 Processing NLTK-normalized similarities...")
        norm_ref = [normalize_with_synonyms(s, synonym_map) for s in reference_statements]
        norm_curr = [normalize_with_synonyms(s, synonym_map) for s in current_statements]
        
        # Filter out empty normalized sentences
        valid_indices = []
        valid_norm_ref = []
        valid_norm_curr = []
        
        for i, (nr, nc) in enumerate(zip(norm_ref, norm_curr)):
            if nr.strip() and nc.strip():
                valid_indices.append(i)
                valid_norm_ref.append(nr)
                valid_norm_curr.append(nc)
        
        # Calculate normalized similarities
        if valid_norm_ref:
            valid_norm_similarities = calculate_batch_similarities(
                valid_norm_ref, valid_norm_curr, batch_size=200
            )
        else:
            valid_norm_similarities = []
        
        # Method 3: Enhanced string similarities
        print("📝 Processing enhanced string similarities...")
        string_similarities = []
        for i in tqdm(range(len(reference_statements)), desc="String similarities"):
            string_sim = calculate_enhanced_string_similarity(
                reference_statements[i], current_statements[i]
            )
            string_similarities.append(string_sim)
        
        # Combine similarities with hybrid approach
        final_similarities = []
        valid_sim_idx = 0
        
        for i in range(len(reference_statements)):
            direct_sim = direct_similarities[i]
            string_sim = string_similarities[i]
            
            if i in valid_indices and valid_sim_idx < len(valid_norm_similarities):
                norm_sim = valid_norm_similarities[valid_sim_idx]
                valid_sim_idx += 1
            else:
                norm_sim = 0.0
            
            # CHANGED: Add order-independent similarities
            order_indep_original = calculate_order_independent_similarity(
                reference_statements[i], current_statements[i]
            )
            order_indep_normalized = calculate_order_independent_similarity(
                norm_ref[i], norm_curr[i]
            ) if norm_ref[i].strip() and norm_curr[i].strip() else 0.0
            
            # Exact match bonus
            exact_bonus = 1.0 if norm_ref[i] == norm_curr[i] and norm_ref[i].strip() else 0.0
            
            # CHANGED: Enhanced hybrid similarity with order-independent metrics
            final_sim = (
                0.15 * direct_sim + 
                0.25 * norm_sim + 
                0.15 * string_sim +
                0.2 * order_indep_normalized +
                0.15 * order_indep_original +
                0.1 * exact_bonus
            )
            final_similarities.append(final_sim)
        
        match_treshold = 0.1
        # Categorize results (adjusted thresholds for TF-IDF)
        for i, similarity in enumerate(final_similarities):
            if similarity >= match_treshold:  # Lower threshold for TF-IDF
                perfect_matches += 1
            elif similarity < match_treshold:
                poor_matches.append((i, similarity, reference_statements[i], current_statements[i]))
        
        # Show statistics
        total_statements = len(reference_statements)
        print(f"🎯 Perfect matches (≥{match_treshold}): {perfect_matches}/{total_statements} ({perfect_matches/total_statements*100:.1f}%)")
        print(f"🔴 Poor matches (<{match_treshold}): {len(poor_matches)}/{total_statements} ({len(poor_matches)/total_statements*100:.1f}%)")
        
        # Show poor matches with detailed analysis
        if poor_matches:
            print(f"\n🔴 DETAILED ANALYSIS OF POOR MATCHES:")
            for idx, sim, ref, curr in poor_matches:
                print(f"Index {idx:4d} | Similarity: {sim:.3f}")
                print(f"  📚 Reference: {ref}")
                print(f"  📝 Current:   {curr}")
                
                # Show normalized versions using NLTK synonyms
                norm_ref_single = normalize_with_synonyms(ref, synonym_map)
                norm_curr_single = normalize_with_synonyms(curr, synonym_map)
                print(f"  🔄 NLTK-normalized ref: {norm_ref_single}")
                print(f"  🔄 NLTK-normalized curr: {norm_curr_single}")
                
                # Show individual similarity components
                direct_emb = direct_similarities[idx]
                if idx in valid_indices:
                    norm_emb_idx = valid_indices.index(idx)
                    norm_emb = valid_norm_similarities[norm_emb_idx] if norm_emb_idx < len(valid_norm_similarities) else 0.0
                else:
                    norm_emb = 0.0
                string_sim = string_similarities[idx]
                
                # CHANGED: Add order-independent similarity components
                order_indep_orig = calculate_order_independent_similarity(ref, curr)
                order_indep_norm = calculate_order_independent_similarity(
                    norm_ref_single, norm_curr_single
                ) if norm_ref_single.strip() and norm_curr_single.strip() else 0.0
                
                print(f"  📊 Direct TF-IDF: {direct_emb:.3f}, Normalized TF-IDF: {norm_emb:.3f}, String: {string_sim:.3f}")
                print(f"  🔄 Order-independent (orig): {order_indep_orig:.3f}, Order-independent (norm): {order_indep_norm:.3f}")
                
                # Show relevant synonyms found
                ref_words = set(safe_word_tokenize(ref.lower()))
                curr_words = set(safe_word_tokenize(curr.lower()))
                ref_synonyms = {word: synonym_map.get(word, word) for word in ref_words if word in synonym_map}
                curr_synonyms = {word: synonym_map.get(word, word) for word in curr_words if word in synonym_map}
                if ref_synonyms or curr_synonyms:
                    print(f"  🔗 Ref synonyms: {ref_synonyms}")
                    print(f"  🔗 Curr synonyms: {curr_synonyms}")
                print()
        
        # Summary of problematic indices
        if poor_matches or moderate_matches:
            print(f"📊 INDICES SUMMARY:")
            if poor_matches:
                poor_indices = [idx for idx, _, _, _ in poor_matches]
                print(f"🔴 Poor indices: {poor_indices}")
        return poor_indices

# Second dataset analysis
poor_indices = check_sentence_order_consistency_smart_nltk(
    {
        "hate_data_not_0": datasets['hate_data_not'][:625], 
        "hate_data_not_1": datasets['hate_data_not'][625:],
    }, 'hate_data_not_1'
)

🤖 SMART NLTK-BASED SENTENCE ORDER CONSISTENCY CHECK
🔧 Using: NLTK WordNet synonyms + scikit-learn TF-IDF
🔍 Building dynamic synonym mapping using NLTK WordNet...
📊 Created synonym mappings for 1341 words

🔄 Analyzing hate_data_not_0 with NLTK-enhanced similarity:
🚀 Processing TF-IDF similarities in batches...
🔄 Processing NLTK-normalized similarities...
📝 Processing enhanced string similarities...


String similarities: 100%|██████████| 625/625 [00:00<00:00, 21387.29it/s]

🎯 Perfect matches (≥0.1): 602/625 (96.3%)
🔴 Poor matches (<0.1): 23/625 (3.7%)

🔴 DETAILED ANALYSIS OF POOR MATCHES:
Index   28 | Similarity: 0.026
  📚 Reference: Intelligence is unrelated to appearance.
  📝 Current:   Unattractive people are not intelligent.
  🔄 NLTK-normalized ref: intelligence unrelated appearance
  🔄 NLTK-normalized curr: unattractive people intelligent
  📊 Direct TF-IDF: 0.000, Normalized TF-IDF: 0.000, String: 0.174
  🔄 Order-independent (orig): 0.000, Order-independent (norm): 0.000
  🔗 Ref synonyms: {'intelligence': 'intelligence', 'appearance': 'appearance', 'unrelated': 'unrelated'}
  🔗 Curr synonyms: {'intelligent': 'intelligent', 'people': 'people'}

Index   79 | Similarity: 0.019
  📚 Reference: Intelligence comes in many forms regardless of appearance.
  📝 Current:   Attractive people are not necessarily intelligent.
  🔄 NLTK-normalized ref: intelligence come many form regardless appearance
  🔄 NLTK-normalized curr: attractive people needs intelligent
  📊 




In [7]:


# # First dataset analysis
# check_sentence_order_consistency_smart_nltk(
#     {
#         "hate_data_0": datasets['hate_data'][:622], 
#         "hate_data_1": datasets['hate_data'][622:],
#         "hate_total_yes_data_0": datasets['hate_total_yes_data'][:622],
#         "hate_total_yes_data_1": datasets['hate_total_yes_data'][622:],
#         "hate_total_no_data_0": datasets['hate_total_no_data'][:622],
#         "hate_total_no_data_1": datasets['hate_total_no_data'][622:]
#     }, 'hate_data_0'
# )

# Second dataset analysis
# check_sentence_order_consistency_smart_nltk(
#     {
#         "hate_data_not_0": datasets['hate_data_not'][:625], 
#         "hate_data_not_1": datasets['hate_data_not'][625:],
#         "hate_total_yes_data_not_0": datasets['hate_total_yes_data_not'][:625],
#         "hate_total_yes_data_not_1": datasets['hate_total_yes_data_not'][625:],
#         "hate_total_no_data_not_0": datasets['hate_total_no_data_not'][:625],
#         "hate_total_no_data_not_1": datasets['hate_total_no_data_not'][625:],
#         "hate_total_yes_data_not_ttt_0": datasets['hate_total_yes_data_not_ttt'][:625],
#         "hate_total_yes_data_not_ttt_1": datasets['hate_total_yes_data_not_ttt'][625:],
#         "hate_total_no_data_not_ttt_0": datasets['hate_total_no_data_not_ttt'][:625],
#         "hate_total_no_data_not_ttt_1": datasets['hate_total_no_data_not_ttt'][625:]
#     }, 'hate_data_not_0'
# )


In [12]:

def count_not_in_labels(datasets):
    """
    Count sentences containing 'not' for each label category
    CHANGED: Added comprehensive 'not' analysis for both label categories
    """
    print("\n=== 'NOT' WORD ANALYSIS BY LABELS ===")
    
    for name, df in datasets.items():
        print(f"\nDataset: {name}")
        
        # Skip datasets with 'ttt' replacement
        if 'ttt' in name:
            print("Skipping dataset with 'ttt' replacement")
            continue
        
        # Count 'not' occurrences by label
        label_0_not_count = 0
        label_1_not_count = 0
        
        label_0_statements = df[df['is_harmfull_opposition'] == 0]['statement']
        label_1_statements = df[df['is_harmfull_opposition'] == 1]['statement']
        
        # Count 'not' in label 0 (harmful)
        for stmt in label_0_statements:
            if re.search(r'\bnot\b', stmt, re.IGNORECASE):
                label_0_not_count += 1
        
        # Count 'not' in label 1 (safe/opposition)
        for stmt in label_1_statements:
            if re.search(r'\bnot\b', stmt, re.IGNORECASE):
                label_1_not_count += 1
        
        print(f"Label 0 (harmful): {len(label_0_statements)} total, {label_0_not_count} contain 'not' ({label_0_not_count/len(label_0_statements)*100:.1f}%)")
        print(f"Label 1 (safe): {len(label_1_statements)} total, {label_1_not_count} contain 'not' ({label_1_not_count/len(label_1_statements)*100:.1f}%)")

count_not_in_labels(datasets)


=== 'NOT' WORD ANALYSIS BY LABELS ===

Dataset: hate_data
Label 0 (harmful): 622 total, 37 contain 'not' (5.9%)
Label 1 (safe): 622 total, 290 contain 'not' (46.6%)

Dataset: hate_total_yes_data
Label 0 (harmful): 622 total, 37 contain 'not' (5.9%)
Label 1 (safe): 622 total, 290 contain 'not' (46.6%)

Dataset: hate_total_no_data
Label 0 (harmful): 622 total, 37 contain 'not' (5.9%)
Label 1 (safe): 622 total, 290 contain 'not' (46.6%)

Dataset: hate_data_not
Label 0 (harmful): 625 total, 320 contain 'not' (51.2%)
Label 1 (safe): 625 total, 297 contain 'not' (47.5%)

Dataset: hate_total_yes_data_not
Label 0 (harmful): 625 total, 320 contain 'not' (51.2%)
Label 1 (safe): 625 total, 297 contain 'not' (47.5%)

Dataset: hate_total_no_data_not
Label 0 (harmful): 625 total, 320 contain 'not' (51.2%)
Label 1 (safe): 625 total, 297 contain 'not' (47.5%)

Dataset: hate_total_yes_data_not_ttt
Skipping dataset with 'ttt' replacement

Dataset: hate_total_no_data_not_ttt
Skipping dataset with 'ttt' 