In [1]:
# ===== IMPROVED ADVERSARIAL AI DETECTION ROBUSTNESS PIPELINE =====
import pandas as pd
import random
import re
import os
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from deep_translator import GoogleTranslator
import string
from typing import List, Tuple, Dict
import numpy as np

In [2]:
# ===== NLTK Setup (Silent) =====
required_nltk = ['punkt', 'punkt_tab', 'wordnet', 'omw-en', 'averaged_perceptron_tagger']
for resource in required_nltk:
    try:
        if resource in ['punkt', 'punkt_tab']:
            nltk.data.find(f'tokenizers/{resource}')
        elif resource == 'averaged_perceptron_tagger':
            nltk.data.find('taggers/averaged_perceptron_tagger')
        else:
            nltk.data.find(f'corpora/{resource}')
    except LookupError:
        nltk.download(resource, quiet=True)

# ===== Groq API Setup =====
try:
    from groq import Groq
    api_key = os.getenv("GROQ_API_KEY", "your_api_key_here")
    if api_key == "your_api_key_here":
        print("Warning: Please set your Groq API key")
        groq_client = None
    else:
        groq_client = Groq(api_key=api_key)
        print("Groq API initialized successfully")
except Exception as e:
    groq_client = None
    print(f"Warning: Groq API not available: {e}")
    print("Pipeline will work with rule-based transformations only")

[nltk_data] Error loading omw-en: Package 'omw-en' not found in index


Groq API initialized successfully


In [3]:
# ===== REALISTIC GENRE-SPECIFIC PATTERNS =====
HUMANIZATION_PATTERNS = {
    'education': {
        'personal_academic': ["Based on my understanding,", "From what I've learned,", "In my studies,", "From my research,", "I believe that"],
        'uncertainty_academic': ["it seems that", "appears to be", "might indicate", "could suggest", "likely demonstrates"],
        'conversational_academic': ["essentially", "basically", "in other words", "put simply", "to clarify"],
        'hedging': ["somewhat", "rather", "quite", "fairly", "relatively"],
        'typos': [("separate", "seperate"), ("definitely", "definately"), ("occurrence", "occurence"), ("receive", "recieve")]
    },
    'creative': {
        'personal_expression': ["I find that", "What strikes me is", "I've always felt", "In my experience", "Personally, I think"],
        'emotional_language': ["beautifully", "powerfully", "deeply", "profoundly", "remarkably"],
        'conversational_flow': ["you know", "I mean", "honestly", "really", "actually"],
        'creative_uncertainty': ["perhaps", "maybe", "possibly", "somehow", "in a way"],
        'typos': [("rhythm", "rythm"), ("beautiful", "beautifull"), ("necessary", "neccessary"), ("embarrass", "embarass")]
    },
    'tech': {
        'personal_experience': ["In my work,", "From my experience,", "I've found that", "Working with this,", "Personally,"],
        'tech_casual': ["basically", "pretty much", "essentially", "more or less", "kind of"],
        'uncertainty_tech': ["probably", "likely", "seems to", "appears to", "I think"],
        'informal_connectors': ["Also,", "Plus,", "But here's the thing,", "Now,", "So basically,"],
        'typos': [("implementation", "implimentation"), ("algorithm", "algorythm"), ("configuration", "confuguration")]
    }
}

AI_MIMICRY_PATTERNS = {
    'education': {
        'academic_starters': ["Research indicates", "Studies demonstrate", "Analysis reveals", "Evidence suggests", "Data shows"],
        'formal_language': ["demonstrates", "indicates", "reveals", "establishes", "manifests"],
        'objective_replacements': [("I think", "Research suggests"), ("In my view", "Analysis indicates"), ("I believe", "Evidence demonstrates")],
        'remove_personal': [("personally", "objectively"), ("I feel", "data indicates"), ("my experience", "research shows")]
    },
    'creative': {
        'analytical_starters': ["Examination reveals", "Analysis demonstrates", "Observation indicates", "Assessment shows"],
        'formal_descriptors': ["exhibits", "demonstrates", "manifests", "displays", "presents"],
        'objective_replacements': [("I find", "Analysis reveals"), ("strikes me", "demonstrates"), ("I've felt", "evidence suggests")],
        'remove_emotional': [("beautifully", "effectively"), ("powerfully", "significantly"), ("deeply", "substantially")]
    },
    'tech': {
        'technical_starters': ["Implementation demonstrates", "System analysis reveals", "Technical evaluation shows", "Performance data indicates"],
        'precise_language': ["optimal performance", "systematic approach", "efficient methodology", "comprehensive solution"],
        'objective_replacements': [("I've found", "Testing demonstrates"), ("In my work", "Implementation shows"), ("personally", "technically")],
        'remove_casual': [("basically", "fundamentally"), ("pretty much", "approximately"), ("kind of", "somewhat")]
    }
}

# ===== GENRE-SPECIFIC ATTACK MAPPING =====
GENRE_ATTACK_MAPPING = {
    'education': {
        'ai_to_human': ["PersonalAcademic", "ConversationalAcademic", "SubtleImperfection"],
        'human_to_ai': ["FormalAcademic", "ObjectiveRestructure", "PrecisionRewrite"]
    },
    'creative': {
        'ai_to_human': ["PersonalExpression", "EmotionalStyle", "CreativeImperfection"],
        'human_to_ai': ["AnalyticalStyle", "ObjectiveCreative", "FormalCreative"]
    },
    'tech': {
        'ai_to_human': ["TechExperience", "CasualTech", "TechImperfection"],
        'human_to_ai': ["TechnicalPrecision", "SystematicApproach", "FormalTech"]
    }
}


In [4]:
# ===== REALISTIC TEXT TRANSFORMATION FUNCTIONS =====

def add_realistic_human_touches(text: str, genre: str, intensity: float = 0.7) -> str:
    """Add realistic human touches without over-formatting"""
    if not text or len(text.strip()) == 0:
        return text
        
    genre_key = genre.lower()
    if genre_key not in HUMANIZATION_PATTERNS:
        genre_key = 'education'
    
    patterns = HUMANIZATION_PATTERNS[genre_key]
    
    try:
        sentences = sent_tokenize(text)
    except:
        sentences = text.split('.')
        
    if not sentences:
        return text
        
    modified_sentences = []
    
    for i, sentence in enumerate(sentences):
        if not sentence.strip():
            continue
            
        current_sentence = sentence.strip()
        
        # Add personal touches more naturally (20% chance for subtle, 40% for high)
        touch_chance = 0.2 if intensity < 0.8 else 0.4
        if random.random() < touch_chance * intensity:
            if genre_key == 'education' and 'personal_academic' in patterns:
                touch = random.choice(patterns['personal_academic'])
                current_sentence = f"{touch} {current_sentence.lower()}"
            elif genre_key == 'creative' and 'personal_expression' in patterns:
                touch = random.choice(patterns['personal_expression'])
                current_sentence = f"{touch} {current_sentence.lower()}"
            elif genre_key == 'tech' and 'personal_experience' in patterns:
                touch = random.choice(patterns['personal_experience'])
                current_sentence = f"{touch} {current_sentence.lower()}"
        
        # Add conversational elements naturally (15% chance)
        elif random.random() < 0.15 * intensity:
            if genre_key == 'education' and 'conversational_academic' in patterns:
                conv = random.choice(patterns['conversational_academic'])
                current_sentence = current_sentence.replace(',', f', {conv},', 1)
            elif genre_key == 'creative' and 'conversational_flow' in patterns:
                conv = random.choice(patterns['conversational_flow'])
                words = current_sentence.split()
                if len(words) > 4:
                    pos = random.randint(1, min(3, len(words)-1))
                    words.insert(pos, f"{conv},")
                    current_sentence = ' '.join(words)
            elif genre_key == 'tech' and 'informal_connectors' in patterns:
                conv = random.choice(patterns['informal_connectors'])
                current_sentence = f"{conv} {current_sentence.lower()}"
        
        # Add appropriate uncertainty/hedging (10% chance)
        elif random.random() < 0.1 * intensity:
            if genre_key == 'education' and 'uncertainty_academic' in patterns:
                hedge = random.choice(patterns['uncertainty_academic'])
                current_sentence = current_sentence.replace(' is ', f' {hedge} ', 1)
            elif genre_key == 'creative' and 'creative_uncertainty' in patterns:
                hedge = random.choice(patterns['creative_uncertainty'])
                current_sentence = current_sentence.replace(' is ', f' is {hedge} ', 1)
            elif genre_key == 'tech' and 'uncertainty_tech' in patterns:
                hedge = random.choice(patterns['uncertainty_tech'])
                current_sentence = current_sentence.replace(' will ', f' {hedge} will ', 1)
        
        modified_sentences.append(current_sentence)
    
    if not modified_sentences:
        return text
        
    # Join as single paragraph - NO bullet points or formatting
    result = '. '.join(modified_sentences)
    if not result.endswith('.'):
        result += '.'
    
    # Add realistic typos sparingly (3% chance for subtle, 8% for high)
    typo_chance = 0.03 if intensity < 0.8 else 0.08
    if 'typos' in patterns and random.random() < typo_chance:
        typo_pair = random.choice(patterns['typos'])
        correct, typo = typo_pair
        if correct.lower() in result.lower() and random.random() < 0.4:
            result = re.sub(re.escape(correct), typo, result, count=1, flags=re.IGNORECASE)
    
    return result

def make_realistically_formal(text: str, genre: str, intensity: float = 0.8) -> str:
    """Make text more formal without over-doing it"""
    genre_key = genre.lower()
    if genre_key not in AI_MIMICRY_PATTERNS:
        genre_key = 'education'
    
    patterns = AI_MIMICRY_PATTERNS[genre_key]
    
    try:
        sentences = sent_tokenize(text)
    except:
        sentences = text.split('.')
    
    modified_sentences = []
    
    for i, sentence in enumerate(sentences):
        if not sentence.strip():
            continue
            
        current_sentence = sentence.strip()
        
        # Add formal starters to some sentences (25% chance for subtle, 35% for high)
        starter_chance = 0.25 if intensity < 0.8 else 0.35
        if (random.random() < starter_chance and len(sentence.split()) > 8 and 
            i == 0 and genre_key in patterns):  # Only first sentence for naturalness
            
            if genre_key == 'education' and 'academic_starters' in patterns:
                starter = random.choice(patterns['academic_starters'])
                current_sentence = f"{starter} that {current_sentence.lower()}"
            elif genre_key == 'creative' and 'analytical_starters' in patterns:
                starter = random.choice(patterns['analytical_starters'])
                current_sentence = f"{starter} that {current_sentence.lower()}"
            elif genre_key == 'tech' and 'technical_starters' in patterns:
                starter = random.choice(patterns['technical_starters'])
                current_sentence = f"{starter} that {current_sentence.lower()}"
        
        modified_sentences.append(current_sentence)
    
    # Join as single paragraph
    result = '. '.join(modified_sentences)
    if not result.endswith('.'):
        result += '.'
    
    # Apply objective replacements gradually
    if 'objective_replacements' in patterns:
        replacement_chance = 0.3 if intensity < 0.8 else 0.5
        for old, new in patterns['objective_replacements']:
            if old.lower() in result.lower() and random.random() < replacement_chance:
                result = re.sub(re.escape(old), new, result, count=1, flags=re.IGNORECASE)
    
    # Remove personal/casual language selectively
    if 'remove_personal' in patterns:
        for old, new in patterns['remove_personal']:
            if old.lower() in result.lower() and random.random() < 0.4:
                result = re.sub(re.escape(old), new, result, count=1, flags=re.IGNORECASE)
    
    return result

def strategic_paraphrasing_realistic(text: str, target_type: str, genre: str, intensity: float) -> str:
    """More realistic paraphrasing that maintains natural flow"""
    if groq_client is None:
        return text
    
    try:
        # More nuanced prompts based on intensity
        if target_type == "humanize":
            if intensity < 0.8:  # Subtle
                prompts = {
                    'education': "Rewrite this academic text to sound like a student or researcher sharing their understanding. Add subtle personal touches like 'from what I understand' but keep it academic. Make it sound natural but scholarly.",
                    'creative': "Rewrite this to sound like someone personally reflecting on the topic. Add gentle personal observations and natural language, but don't overdo it.",
                    'tech': "Rewrite this technical content to sound like a developer sharing insights. Add casual phrases like 'I've noticed' but keep the technical accuracy."
                }
            else:  # High intensity but realistic
                prompts = {
                    'education': "Rewrite this to sound like a knowledgeable person explaining the concept conversationally. Add personal insights and natural hesitations, making it sound like spoken academic discourse.",
                    'creative': "Rewrite this with personal voice and emotional engagement, like someone passionate about the topic sharing their thoughts naturally.",
                    'tech': "Rewrite this as if an experienced developer is explaining it conversationally, with personal examples and informal language, but maintaining technical accuracy."
                }
        else:  # ai_like
            if intensity < 0.8:  # Subtle formalization
                prompts = {
                    'education': "Make this text slightly more academic and formal. Use more precise language but don't remove all personality. Keep it as one flowing paragraph.",
                    'creative': "Make this analysis more objective and formal, but not robotic. Use analytical language while maintaining readability as a single paragraph.",
                    'tech': "Make this more technical and precise. Use formal terminology but keep it readable. Present as one cohesive paragraph."
                }
            else:  # High intensity but not robotic
                prompts = {
                    'education': "Rewrite this as formal academic prose. Use objective language and scholarly terminology, but make it read naturally as continuous text.",
                    'creative': "Transform this into analytical, objective commentary. Remove personal elements and use formal analytical language, but keep natural paragraph flow.",
                    'tech': "Rewrite this as formal technical documentation. Use precise, professional language and objective tone, maintaining paragraph structure."
                }
        
        prompt = prompts.get(genre.lower(), prompts['education'])
        
        response = groq_client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[
                {"role": "system", "content": f"{prompt} IMPORTANT: Keep the output as ONE SINGLE PARAGRAPH with no bullet points, no numbered lists, no section headers, and no special formatting."},
                {"role": "user", "content": text}
            ],
            max_tokens=min(1000, len(text.split()) * 2),
            temperature=0.5 if intensity < 0.8 else 0.7
        )
        
        result = response.choices[0].message.content.strip()
        
        # Ensure it's a single paragraph - remove any formatting artifacts
        result = re.sub(r'\n+', ' ', result)  # Remove line breaks
        result = re.sub(r'\*+.*?\*+', '', result)  # Remove markdown bold
        result = re.sub(r'#+\s*', '', result)  # Remove headers
        result = re.sub(r'^\d+\.\s*', '', result)  # Remove numbered lists
        result = re.sub(r'^[-*•]\s*', '', result)  # Remove bullet points
        result = re.sub(r'\s+', ' ', result).strip()  # Clean up spacing
        
        return result
    
    except Exception as e:
        print(f"Paraphrasing failed: {e}")
        return text


In [5]:
def gentle_translation(text: str, intensity: float) -> str:
    """More conservative translation approach"""
    if len(text) > 1200 or intensity < 0.7:  # Skip for long texts or low intensity
        return text
        
    try:
        text_to_translate = text[:800] if len(text) > 800 else text
        
        # Single hop for subtle, double hop for high intensity
        if intensity < 0.8:
            # Single hop: English -> Spanish -> English
            step1 = GoogleTranslator(source='en', target='es').translate(text_to_translate)
            if step1:
                result = GoogleTranslator(source='es', target='en').translate(step1)
                return result if result else text
        else:
            # Double hop: English -> German -> English
            step1 = GoogleTranslator(source='en', target='de').translate(text_to_translate)
            if step1:
                result = GoogleTranslator(source='de', target='en').translate(step1)
                return result if result else text
                
    except Exception as e:
        print(f"Translation failed: {e}")
        return text
    
    return text

In [6]:
# ===== IMPROVED ADVERSARIAL ATTACK STRATEGIES =====

def realistic_adversarial_attack(text: str, source_label: str, genre: str, attack_type: str, intensity: float) -> Tuple[str, str]:
    """Execute realistic adversarial attacks based on genre-specific mapping"""
    
    source_lower = source_label.lower()
    genre_lower = genre.lower()
    
    # Map broad genres to our categories
    if 'education' in genre_lower or 'academic' in genre_lower or 'essay' in genre_lower or 'research' in genre_lower:
        mapped_genre = 'education'
    elif 'creative' in genre_lower or 'poem' in genre_lower or 'story' in genre_lower or 'history' in genre_lower:
        mapped_genre = 'creative'  
    elif 'tech' in genre_lower or 'article' in genre_lower or 'review' in genre_lower:
        mapped_genre = 'tech'
    else:
        mapped_genre = 'education'  # default fallback
    
    if source_lower == 'ai':
        # FALSE NEGATIVE attacks: Make AI text appear human-written
        
        if attack_type == "PersonalAcademic" and mapped_genre == 'education':
            step1 = strategic_paraphrasing_realistic(text, "humanize", mapped_genre, intensity)
            step2 = add_realistic_human_touches(step1, mapped_genre, intensity)
            return step2, "Personal_Academic_Voice"
        
        elif attack_type == "ConversationalAcademic" and mapped_genre == 'education':
            step1 = add_realistic_human_touches(text, mapped_genre, intensity)
            step2 = gentle_translation(step1, intensity) if intensity > 0.7 else step1
            return step2, "Conversational_Academic"
        
        elif attack_type == "SubtleImperfection" and mapped_genre == 'education':
            step1 = add_realistic_human_touches(text, mapped_genre, intensity * 0.8)
            return step1, "Subtle_Academic_Imperfection"
        
        elif attack_type == "PersonalExpression" and mapped_genre == 'creative':
            step1 = strategic_paraphrasing_realistic(text, "humanize", mapped_genre, intensity)
            step2 = add_realistic_human_touches(step1, mapped_genre, intensity)
            return step2, "Personal_Creative_Expression"
        
        elif attack_type == "EmotionalStyle" and mapped_genre == 'creative':
            step1 = add_realistic_human_touches(text, mapped_genre, intensity)
            return step1, "Emotional_Creative_Style"
        
        elif attack_type == "CreativeImperfection" and mapped_genre == 'creative':
            step1 = add_realistic_human_touches(text, mapped_genre, intensity * 0.7)
            return step1, "Creative_Human_Imperfection"
        
        elif attack_type == "TechExperience" and mapped_genre == 'tech':
            step1 = strategic_paraphrasing_realistic(text, "humanize", mapped_genre, intensity)
            step2 = add_realistic_human_touches(step1, mapped_genre, intensity)
            return step2, "Technical_Experience_Voice"
        
        elif attack_type == "CasualTech" and mapped_genre == 'tech':
            step1 = add_realistic_human_touches(text, mapped_genre, intensity)
            return step1, "Casual_Technical_Style"
        
        elif attack_type == "TechImperfection" and mapped_genre == 'tech':
            step1 = add_realistic_human_touches(text, mapped_genre, intensity * 0.8)
            return step1, "Technical_Human_Imperfection"
    
    else:  # source_lower == 'human'
        # FALSE POSITIVE attacks: Make human text appear AI-generated
        
        if attack_type == "FormalAcademic" and mapped_genre == 'education':
            step1 = strategic_paraphrasing_realistic(text, "ai_like", mapped_genre, intensity)
            step2 = make_realistically_formal(step1, mapped_genre, intensity)
            return step2, "Formal_Academic_Style"
        
        elif attack_type == "ObjectiveRestructure" and mapped_genre == 'education':
            step1 = make_realistically_formal(text, mapped_genre, intensity)
            step2 = gentle_translation(step1, intensity) if intensity > 0.7 else step1
            return step2, "Objective_Academic_Restructure"
        
        elif attack_type == "PrecisionRewrite" and mapped_genre == 'education':
            step1 = strategic_paraphrasing_realistic(text, "ai_like", mapped_genre, intensity)
            return step1, "Precision_Academic_Rewrite"
        
        elif attack_type == "AnalyticalStyle" and mapped_genre == 'creative':
            step1 = strategic_paraphrasing_realistic(text, "ai_like", mapped_genre, intensity)
            step2 = make_realistically_formal(step1, mapped_genre, intensity)
            return step2, "Analytical_Creative_Style"
        
        elif attack_type == "ObjectiveCreative" and mapped_genre == 'creative':
            step1 = make_realistically_formal(text, mapped_genre, intensity)
            return step1, "Objective_Creative_Analysis"
        
        elif attack_type == "FormalCreative" and mapped_genre == 'creative':
            step1 = strategic_paraphrasing_realistic(text, "ai_like", mapped_genre, intensity)
            return step1, "Formal_Creative_Analysis"
        
        elif attack_type == "TechnicalPrecision" and mapped_genre == 'tech':
            step1 = strategic_paraphrasing_realistic(text, "ai_like", mapped_genre, intensity)
            step2 = make_realistically_formal(step1, mapped_genre, intensity)
            return step2, "Technical_Precision_Enhancement"
        
        elif attack_type == "SystematicApproach" and mapped_genre == 'tech':
            step1 = make_realistically_formal(text, mapped_genre, intensity)
            return step1, "Systematic_Technical_Approach"
        
        elif attack_type == "FormalTech" and mapped_genre == 'tech':
            step1 = strategic_paraphrasing_realistic(text, "ai_like", mapped_genre, intensity)
            return step1, "Formal_Technical_Style"
    
    return text, "No_Attack_Applied"

In [7]:
# ===== MAIN PROCESSING PIPELINE =====

def process_adversarial_dataset(input_file: str, output_file: str = None) -> pd.DataFrame:
    """
    Main pipeline for generating realistic adversarial samples
    """
    
    # Load dataset
    try:
        if input_file.endswith('.xlsx'):
            df = pd.read_excel(input_file)
        else:
            df = pd.read_csv(input_file)
        print(f"Loaded dataset: {len(df)} samples")
    except FileNotFoundError:
        print(f"Error: Could not find {input_file}")
        return None
    except Exception as e:
        print(f"Error loading file: {e}")
        return None
    
    # Validate required columns
    required_columns = ['original_text', 'source', 'genre']
    missing_columns = [col for col in required_columns if col not in df.columns]
    if missing_columns:
        print(f"Error: Missing required columns: {missing_columns}")
        print(f"Available columns: {list(df.columns)}")
        return None
    
    # Clean and validate data
    df = df.dropna(subset=['original_text', 'source', 'genre'])
    df['source'] = df['source'].str.lower().str.strip()
    df['genre'] = df['genre'].str.lower().str.strip()
    
    # Filter valid sources
    valid_sources = df['source'].isin(['ai', 'human'])
    if not valid_sources.all():
        invalid_count = (~valid_sources).sum()
        print(f"Warning: {invalid_count} samples have invalid source labels. Filtering them out.")
        df = df[valid_sources].reset_index(drop=True)
    
    print(f"Final dataset: {len(df)} samples")
    
    # Initialize result columns
    result_columns = {
        'adversarial_text': "",
        'attack_type': "",
        'attack_technique': "",
        'attack_target': "",
        'original_length': 0,
        'adversarial_length': 0,
        'length_change_pct': 0.0,
        'modification_intensity': "",
        'expected_impact': ""
    }
    
    for col, default_val in result_columns.items():
        df[col] = default_val
    
    # Process each sample
    print("Processing samples...")
    successful_attacks = 0
    
    for idx in df.index:
        try:
            original_text = str(df.at[idx, 'original_text'])
            source = str(df.at[idx, 'source']).strip().lower()
            genre = str(df.at[idx, 'genre']).strip().lower()
            
            # Skip if text is too short or too long
            if len(original_text.split()) < 15:
                df.at[idx, 'adversarial_text'] = original_text
                df.at[idx, 'attack_type'] = "Text_Too_Short"
                continue
            elif len(original_text.split()) > 500:
                df.at[idx, 'adversarial_text'] = original_text
                df.at[idx, 'attack_type'] = "Text_Too_Long"  
                continue
            
            # Map genre to our categories
            if 'education' in genre or 'academic' in genre or 'essay' in genre or 'research' in genre:
                mapped_genre = 'education'
            elif 'creative' in genre or 'poem' in genre or 'story' in genre or 'history' in genre:
                mapped_genre = 'creative'  
            elif 'tech' in genre or 'article' in genre or 'review' in genre:
                mapped_genre = 'tech'
            else:
                mapped_genre = 'education'  # default
            
            # Select genre-appropriate attack strategy
            if source == 'ai':
                available_attacks = GENRE_ATTACK_MAPPING[mapped_genre]['ai_to_human']
                target = "False_Negative"
            else:  # source == 'human'
                available_attacks = GENRE_ATTACK_MAPPING[mapped_genre]['human_to_ai']
                target = "False_Positive"
            
            attack_type = random.choice(available_attacks)
            
            # Determine intensity based on text characteristics
            word_count = len(original_text.split())
            if word_count > 100:
                intensity = random.choice([0.6, 0.7, 0.8])  # More conservative for longer texts
            else:
                intensity = random.choice([0.7, 0.8, 0.9])  # Can be more aggressive for shorter texts
            
            # Execute realistic adversarial attack
            adversarial_text, technique_used = realistic_adversarial_attack(
                original_text, source, mapped_genre, attack_type, intensity
            )
            
            # Calculate metrics
            orig_length = len(original_text.split())
            adv_length = len(adversarial_text.split())
            length_change_pct = ((adv_length - orig_length) / orig_length) * 100
            
            # Determine modification intensity based on both length change and actual intensity
            if intensity >= 0.8 and abs(length_change_pct) > 15:
                intensity_label = "High"
            elif intensity >= 0.7 or abs(length_change_pct) > 8:
                intensity_label = "Moderate"
            else:
                intensity_label = "Subtle"
            
            # Expected impact assessment
            if intensity_label == "High" and technique_used != "No_Attack_Applied":
                impact = "Strong_Realistic_Challenge"
            elif intensity_label == "Moderate":
                impact = "Moderate_Natural_Challenge"
            else:
                impact = "Subtle_Pattern_Shift"
            
            # Store results
            df.at[idx, 'adversarial_text'] = adversarial_text
            df.at[idx, 'attack_type'] = attack_type
            df.at[idx, 'attack_technique'] = technique_used
            df.at[idx, 'attack_target'] = target
            df.at[idx, 'original_length'] = orig_length
            df.at[idx, 'adversarial_length'] = adv_length
            df.at[idx, 'length_change_pct'] = round(length_change_pct, 2)
            df.at[idx, 'modification_intensity'] = intensity_label
            df.at[idx, 'expected_impact'] = impact
            
            if technique_used != "No_Attack_Applied":
                successful_attacks += 1
            
        except Exception as e:
            print(f"Error processing sample {idx}: {e}")
            df.at[idx, 'adversarial_text'] = df.at[idx, 'original_text']
            df.at[idx, 'attack_type'] = "Processing_Error"
            df.at[idx, 'attack_technique'] = f"Error: {str(e)[:50]}"
    
    # Save results
    if output_file is None:
        base_name = input_file.rsplit('.', 1)[0]
        output_file = f"{base_name}_realistic_adversarial_results.xlsx"
    
    try:
        df.to_excel(output_file, index=False)
        print(f"Results saved to: {output_file}")
    except Exception as e:
        print(f"Error saving file: {e}")
        return df
    
    # Generate summary statistics
    print("\n=== REALISTIC ADVERSARIAL PIPELINE SUMMARY ===")
    print(f"Total samples processed: {len(df)}")
    print(f"Successful attacks: {successful_attacks}")
    print(f"Success rate: {(successful_attacks/len(df)*100):.1f}%")
    
    # Attack distribution by genre
    print(f"\nGenre-specific attack distribution:")
    genre_attack = df.groupby(['genre', 'attack_type']).size().reset_index(name='count')
    for genre in df['genre'].unique():
        genre_data = genre_attack[genre_attack['genre'] == genre]
        print(f"\n  {genre.upper()}:")
        for _, row in genre_data.iterrows():
            print(f"    {row['attack_type']}: {row['count']}")
    
    # Target distribution
    print(f"\nAttack target distribution:")
    target_counts = df['attack_target'].value_counts()
    for target, count in target_counts.items():
        print(f"  {target}: {count}")
    
    # Intensity distribution
    print(f"\nModification intensity distribution:")
    intensity_counts = df['modification_intensity'].value_counts()
    for intensity, count in intensity_counts.items():
        print(f"  {intensity}: {count}")
    
    # Sample some results for quality check
    print(f"\n=== SAMPLE TRANSFORMATIONS ===")
    sample_indices = df.sample(min(3, len(df))).index
    for idx in sample_indices:
        print(f"\nSample {idx} ({df.at[idx, 'genre']} - {df.at[idx, 'source']} -> {df.at[idx, 'attack_type']}):")
        print(f"Original: {df.at[idx, 'original_text'][:100]}...")
        print(f"Adversarial: {df.at[idx, 'adversarial_text'][:100]}...")
        print(f"Technique: {df.at[idx, 'attack_technique']}")
    
    print(f"\n=== READY FOR REALISTIC AI DETECTOR TESTING ===")
    print("✓ Genre-appropriate attacks applied")
    print("✓ Single paragraph format maintained")
    print("✓ Natural, realistic transformations")
    print("✓ Mimics real-world editing scenarios")
    
    return df

In [8]:
# ===== MAIN EXECUTION FUNCTION =====

def run_realistic_adversarial_pipeline(input_file: str):
    """
    Execute the improved adversarial pipeline for realistic AI detection testing
    
    Args:
        input_file: Path to Excel/CSV file with columns: original_text, source, genre
    
    Returns:
        DataFrame with original and realistically modified adversarial samples
    """
    
    print("=== REALISTIC ADVERSARIAL AI DETECTION PIPELINE ===")
    print("Purpose: Generate realistic adversarial samples mimicking real-world scenarios")
    print("Features:")
    print("• Genre-specific attack strategies")
    print("• Single paragraph format preservation")
    print("• Natural editing patterns")
    print("• Realistic intensity levels")
    print(f"Input file: {input_file}")
    
    # Process the dataset
    result_df = process_adversarial_dataset(input_file)
    
    if result_df is not None:
        print("\n✓ Realistic pipeline completed successfully")
        print("✓ Natural transformations applied")
        print("✓ Ready for detector robustness evaluation")
        print("✓ Dataset prepared for real-world scenario testing")
        return result_df
    else:
        print("\n✗ Pipeline failed - please check input file and requirements")
        return None


In [10]:
# ===== USAGE EXAMPLE =====
if __name__ == "__main__":
    # Example usage - replace with your actual file
    input_filename = "Sample.xlsx"  # Your dataset file
    results = run_realistic_adversarial_pipeline(input_filename)
    
    # Optional: Check specific transformations
    if results is not None:
        print("\n=== QUALITY CHECK ===")
        # Show high-intensity transformations
        high_intensity = results[results['modification_intensity'] == 'High']
        if len(high_intensity) > 0:
            print(f"\nHigh-intensity sample:")
            idx = high_intensity.index[0]
            print(f"Genre: {results.at[idx, 'genre']}")
            print(f"Source: {results.at[idx, 'source']}")
            print(f"Attack: {results.at[idx, 'attack_type']}")
            print(f"Original: {results.at[idx, 'original_text']}")
            print(f"Modified: {results.at[idx, 'adversarial_text']}")
            print(f"Change: {results.at[idx, 'length_change_pct']}%")
        
        # Check for any formatting issues
        formatting_issues = 0
        for idx in results.index:
            adv_text = str(results.at[idx, 'adversarial_text'])
            if ('*' in adv_text or '#' in adv_text or 
                adv_text.count('\n') > 2 or '1.' in adv_text or '•' in adv_text):
                formatting_issues += 1
        
        print(f"\nFormatting quality check: {len(results) - formatting_issues}/{len(results)} samples properly formatted")
        if formatting_issues > 0:
            print(f"⚠️  {formatting_issues} samples may have formatting artifacts")
        else:
            print("✓ All samples maintain single paragraph format")

=== REALISTIC ADVERSARIAL AI DETECTION PIPELINE ===
Purpose: Generate realistic adversarial samples mimicking real-world scenarios
Features:
• Genre-specific attack strategies
• Single paragraph format preservation
• Natural editing patterns
• Realistic intensity levels
Input file: Sample.xlsx
Loaded dataset: 300 samples
Final dataset: 300 samples
Processing samples...
Results saved to: Sample_realistic_adversarial_results.xlsx

=== REALISTIC ADVERSARIAL PIPELINE SUMMARY ===
Total samples processed: 300
Successful attacks: 300
Success rate: 100.0%

Genre-specific attack distribution:

  TECH:
    CasualTech: 13
    FormalTech: 12
    SystematicApproach: 14
    TechExperience: 21
    TechImperfection: 16
    TechnicalPrecision: 24

  EDUCATION:
    ConversationalAcademic: 12
    FormalAcademic: 18
    ObjectiveRestructure: 20
    PersonalAcademic: 28
    PrecisionRewrite: 12
    SubtleImperfection: 10

  CREATIVES:
    AnalyticalStyle: 20
    CreativeImperfection: 23
    EmotionalStyle: