# Enhanced TTT Strategy - GPU Validation

This notebook validates the Enhanced TTT Strategy implementation on Kaggle GPU.

**Target Metrics:**
- Accuracy: 58%+ on 100 evaluation tasks
- Inference Time: <5 minutes per task
- Performance Speedup: 30-40% from optimizations

**Runtime: ~6-9 hours total**

---

## Cell 1: Environment Check

In [None]:
import sys
import torch
import os
from pathlib import Path

print("=" * 60)
print("KAGGLE ENVIRONMENT CHECK")
print("=" * 60)

# Check CUDA availability
cuda_available = torch.cuda.is_available()
gpu_count = torch.cuda.device_count()
print(f"CUDA Available: {cuda_available}")
print(f"GPU Count: {gpu_count}")

if cuda_available:
    for i in range(gpu_count):
        gpu_name = torch.cuda.get_device_name(i)
        gpu_memory = torch.cuda.get_device_properties(i).total_memory / 1024**3
        print(f"GPU {i}: {gpu_name} ({gpu_memory:.1f} GB)")
else:
    print("\n⚠️ WARNING: No GPU detected! Enable GPU in notebook settings.")
    print("Settings → Accelerator → GPU T4 x2")

# Check Kaggle environment
print(f"\nKaggle Working Dir: {os.path.exists('/kaggle/working')}")
print(f"Kaggle Input Dir: {os.path.exists('/kaggle/input')}")
print(f"Python Version: {sys.version.split()[0]}")
print("=" * 60)

## Cell 2: Clone Repository & Setup

In [None]:
# Enhanced repository setup with multiple fallback options
import os
import subprocess
import sys
from pathlib import Path
import time

print("=" * 60)
print("REPOSITORY SETUP WITH FALLBACK OPTIONS")
print("=" * 60)

# Repository configuration
REPO_URL = "https://github.com/18h32n/shockabo.git"
TARGET_DIR = "/kaggle/working/arc-prize-2025"

def run_command(cmd, description, timeout=300):
    """Run command with proper error handling and timeout."""
    print(f"\n{description}...")
    try:
        result = subprocess.run(
            cmd, shell=True, capture_output=True, text=True, 
            timeout=timeout, encoding='utf-8', errors='replace'
        )
        if result.returncode == 0:
            print(f"✓ {description} successful")
            if result.stdout.strip():
                print(f"Output: {result.stdout.strip()}")
            return True
        else:
            print(f"✗ {description} failed (code: {result.returncode})")
            if result.stderr.strip():
                print(f"Error: {result.stderr.strip()}")
            return False
    except subprocess.TimeoutExpired:
        print(f"✗ {description} timed out after {timeout}s")
        return False
    except Exception as e:
        print(f"✗ {description} exception: {e}")
        return False

# Option 1: Try fresh clone
print("\n" + "="*60)
print("OPTION 1: Fresh Repository Clone")
print("="*60)

# Remove existing directory if it exists
if os.path.exists(TARGET_DIR):
    print(f"Removing existing directory: {TARGET_DIR}")
    run_command(f"rm -rf {TARGET_DIR}", "Directory cleanup")

# Try cloning
clone_success = run_command(
    f"git clone {REPO_URL} {TARGET_DIR}",
    "Git clone",
    timeout=600
)

if clone_success:
    # Verify clone worked
    if os.path.exists(f"{TARGET_DIR}/src/utils/ttt_methodology.py"):
        print("✓ Repository clone verified - source files found")
    else:
        print("✗ Repository clone incomplete - missing source files")
        clone_success = False

# Option 2: Try alternative clone method
if not clone_success:
    print("\n" + "="*60)
    print("OPTION 2: Alternative Clone Method")
    print("="*60)
    
    run_command(f"mkdir -p {TARGET_DIR}", "Create target directory")
    os.chdir("/kaggle/working")
    
    alt_success = run_command(
        f"git clone --depth 1 --branch master {REPO_URL} arc-prize-2025",
        "Shallow clone attempt"
    )
    clone_success = alt_success

# Option 3: Manual verification and setup
if not clone_success:
    print("\n" + "="*60)
    print("OPTION 3: Manual Fallback Setup")
    print("="*60)
    print("❌ Repository clone failed. Creating minimal structure...")
    
    # Create basic directory structure
    os.makedirs(f"{TARGET_DIR}/src/utils", exist_ok=True)
    os.makedirs(f"{TARGET_DIR}/tests/integration", exist_ok=True)
    os.makedirs(f"{TARGET_DIR}/scripts/platform_deploy", exist_ok=True)
    
    print("⚠️  CRITICAL: Repository access failed!")
    print("   This will prevent the enhanced TTT validation from running properly.")
    print("   Please check repository access or upload code as Kaggle dataset.")

# Change to project directory regardless
try:
    os.chdir(TARGET_DIR)
    print(f"\n✓ Changed to directory: {os.getcwd()}")
except Exception as e:
    print(f"✗ Failed to change directory: {e}")

# Verify final state
print("\n" + "="*60)
print("VERIFICATION AND SETUP")
print("="*60)

print("Current directory contents:")
try:
    for item in sorted(os.listdir(".")):
        path = Path(item)
        if path.is_dir():
            print(f"  📁 {item}/")
        else:
            print(f"  📄 {item}")
except Exception as e:
    print(f"Error listing directory: {e}")

# Run setup script if it exists
setup_script = Path("scripts/platform_deploy/kaggle_setup.py")
if setup_script.exists():
    print(f"\n✓ Setup script found: {setup_script}")
    run_command(
        "python scripts/platform_deploy/kaggle_setup.py",
        "Kaggle platform setup",
        timeout=600
    )
else:
    print(f"\n⚠️  Setup script not found: {setup_script}")
    print("Creating minimal environment setup...")
    
    # Set critical environment variables manually
    os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
    os.environ['TORCH_USE_CUDA_DSA'] = '1' 
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    print("✓ Set critical CUDA environment variables")

print("\n" + "="*60)
print("SETUP COMPLETE")
print("="*60)

## Cell 3: Install Additional Dependencies

In [None]:
# Install test dependencies
!pip install -q pytest pytest-asyncio pytest-timeout pytest-mock

# Install transformers and related packages
!pip install -q transformers>=4.35.0 accelerate>=0.25.0 bitsandbytes>=0.41.0 peft>=0.7.0

# Verify installations
import pytest
import transformers
import accelerate
import peft

print(f"pytest: {pytest.__version__}")
print(f"transformers: {transformers.__version__}")
print(f"accelerate: {accelerate.__version__}")
print(f"peft: {peft.__version__}")
print("\n✓ All dependencies installed successfully!")

## Cell 4: Verify ARC Dataset

In [None]:
# Check for ARC dataset in Kaggle input
import json
from pathlib import Path

# Common locations for ARC dataset in Kaggle
dataset_locations = [
    "/kaggle/input/arc-prize-2025",
    "/kaggle/input/abstraction-and-reasoning-corpus",
    "/kaggle/input/arc-agi",
    "arc-prize-2025/data/downloaded"  # Local copy
]

dataset_path = None
for loc in dataset_locations:
    test_path = Path(loc)
    if test_path.exists():
        print(f"Found dataset at: {loc}")
        dataset_path = test_path
        break

if dataset_path:
    # Look for evaluation files
    eval_files = list(dataset_path.glob("**/arc-agi_evaluation_challenges.json"))
    solution_files = list(dataset_path.glob("**/arc-agi_evaluation_solutions.json"))
    
    if eval_files and solution_files:
        eval_path = eval_files[0]
        solution_path = solution_files[0]
        
        # Load and verify
        with open(eval_path) as f:
            challenges = json.load(f)
        with open(solution_path) as f:
            solutions = json.load(f)
        
        print(f"\n✓ Dataset verified!")
        print(f"  Challenges: {len(challenges)} tasks")
        print(f"  Solutions: {len(solutions)} tasks")
        print(f"  Evaluation path: {eval_path}")
        print(f"  Solutions path: {solution_path}")
        
        # Create symlink or copy to expected location
        expected_dir = Path("arc-prize-2025/data/downloaded")
        expected_dir.mkdir(parents=True, exist_ok=True)
        
        import shutil
        shutil.copy(eval_path, expected_dir / "arc-agi_evaluation_challenges.json")
        shutil.copy(solution_path, expected_dir / "arc-agi_evaluation_solutions.json")
        print(f"\n✓ Dataset copied to: {expected_dir}")
    else:
        print(f"\n⚠️ Dataset files not found in {dataset_path}")
else:
    print("\n⚠️ ARC dataset not found!")
    print("Please add ARC dataset to your notebook:")
    print("  1. Click '+ Add Data' button")
    print("  2. Search for 'ARC Prize 2025' or 'abstraction reasoning'")
    print("  3. Click 'Add' and re-run this cell")

## Cell 5: Run Enhanced TTT Accuracy Validation

**This will take 4-6 hours to complete**

Tests 100 evaluation tasks with:
- Leave-one-out generation
- Self-consistency validation
- LoRA optimization
- Memory efficient batch processing

In [None]:
# Enhanced TTT accuracy test with comprehensive diagnostics
import subprocess
import sys
import os
from pathlib import Path
import time

print("=" * 60)
print("ENHANCED TTT ACCURACY VALIDATION")
print("=" * 60)
print("This will take 4-6 hours to complete (or fail quickly if issues persist)")

# Pre-flight checks
print("\n" + "="*50)
print("PRE-FLIGHT DIAGNOSTICS")
print("="*50)

# Check current directory and files
current_dir = Path.cwd()
print(f"Current directory: {current_dir}")

# Check for test file
test_file = Path("tests/integration/test_enhanced_ttt_accuracy.py")
print(f"Test file exists: {test_file.exists()}")

if test_file.exists():
    print(f"✓ Test file found: {test_file}")
    file_size = test_file.stat().st_size
    print(f"  File size: {file_size} bytes")
else:
    print(f"✗ Test file NOT found: {test_file}")
    print("Available test files:")
    test_dir = Path("tests")
    if test_dir.exists():
        for f in test_dir.rglob("*.py"):
            print(f"  - {f}")
    else:
        print("  No tests directory found!")

# Check for source files
critical_files = [
    "src/utils/ttt_methodology.py",
    "src/utils/lora_adapter.py", 
    "src/adapters/strategies/ttt_adapter.py"
]

print(f"\nCritical source files:")
all_files_exist = True
for file_path in critical_files:
    exists = Path(file_path).exists()
    print(f"  {'✓' if exists else '✗'} {file_path}")
    if not exists:
        all_files_exist = False

# Check environment variables
print(f"\nCUDA Environment Variables:")
cuda_vars = ['CUDA_LAUNCH_BLOCKING', 'TORCH_USE_CUDA_DSA', 'CUDA_VISIBLE_DEVICES']
for var in cuda_vars:
    value = os.environ.get(var, 'Not set')
    print(f"  {var}: {value}")

# Check CUDA availability
try:
    import torch
    print(f"\nPyTorch CUDA Status:")
    print(f"  CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"  Device count: {torch.cuda.device_count()}")
        print(f"  Current device: {torch.cuda.current_device()}")
        print(f"  Device name: {torch.cuda.get_device_name(0)}")
except Exception as e:
    print(f"  Error checking CUDA: {e}")

# Determine execution strategy
print("\n" + "="*50)
print("EXECUTION STRATEGY")
print("="*50)

if not all_files_exist:
    print("❌ CRITICAL: Missing source files - cannot run full validation")
    print("This suggests repository setup failed.")
    print("The test will likely fail immediately.")
    print("\nTo fix this:")
    print("1. Check repository access")
    print("2. Verify GitHub URL is correct") 
    print("3. Consider uploading code as Kaggle dataset")
    
elif not test_file.exists():
    print("❌ CRITICAL: Test file missing - cannot run validation")
    print("Even if source files exist, the test runner won't work.")
    
else:
    print("✓ All pre-flight checks passed")
    print("Proceeding with enhanced TTT validation...")

# Execute the test with enhanced error handling
print("\n" + "="*50)
print("TEST EXECUTION")
print("="*50)

start_time = time.time()

try:
    # Run the test with comprehensive logging
    cmd = [
        sys.executable, "-m", "pytest", 
        "tests/integration/test_enhanced_ttt_accuracy.py::test_enhanced_ttt_accuracy",
        "-v", "-s", "--no-header", "--tb=long",  # Changed to long traceback
        "--capture=no",  # Don't capture output
        "--timeout=25200"  # 7 hour timeout
    ]
    
    print(f"Executing command: {' '.join(cmd)}")
    print(f"Working directory: {os.getcwd()}")
    print(f"Python executable: {sys.executable}")
    
    # Run with real-time output
    process = subprocess.Popen(
        cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1,
        universal_newlines=True
    )
    
    # Print output in real-time
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None:
            break
        if output:
            print(output.strip())
    
    return_code = process.poll()
    execution_time = time.time() - start_time
    
    print(f"\n" + "="*50)
    print("TEST EXECUTION SUMMARY")
    print("="*50)
    print(f"Execution time: {execution_time:.1f} seconds ({execution_time/60:.1f} minutes)")
    print(f"Return code: {return_code}")
    
    if return_code == 0:
        print("✓ Enhanced TTT Accuracy Test PASSED")
    else:
        print(f"⚠️ Enhanced TTT Accuracy Test returned code: {return_code}")
        
        # Provide specific guidance based on execution time
        if execution_time < 300:  # Less than 5 minutes
            print("⚠️ QUICK FAILURE DETECTED")
            print("This suggests a fundamental setup or CUDA issue.")
            print("Check the logs above for CUDA errors, setup failures, or missing files.")
        elif execution_time < 3600:  # Less than 1 hour  
            print("⚠️ EARLY TERMINATION")
            print("Test started but terminated early. Check for memory issues or errors.")
        else:
            print("ℹ️ Long execution detected - check logs for completion status.")
        
except subprocess.TimeoutExpired:
    execution_time = time.time() - start_time
    print(f"\n⚠️ Test timed out after {execution_time:.1f} seconds")
    print("This could indicate the test is running normally (if close to 7 hours)")
    print("Or stuck in an infinite loop (if much shorter)")
    
except Exception as e:
    execution_time = time.time() - start_time
    print(f"\n✗ Test execution failed after {execution_time:.1f} seconds: {e}")
    print("This indicates a fundamental execution environment issue.")

print("\n" + "="*60)
print("ENHANCED TTT ACCURACY TEST COMPLETE")
print("="*60)

## Cell 6: Analyze Accuracy Results

In [None]:
import json
from pathlib import Path

print("=" * 60)
print("ENHANCED TTT ACCURACY RESULTS")
print("=" * 60)

report_path = Path("validation_results/enhanced_ttt_report.json")

if not report_path.exists():
    print("⚠️ Results file not found!")
    print(f"Expected: {report_path}")
    
    # Try to find alternative result files
    results_dir = Path("validation_results")
    if results_dir.exists():
        print("\nAvailable result files:")
        for f in results_dir.glob("*.json"):
            print(f"  - {f}")
    else:
        print("No results directory found")
        
    # Check if test ran at all
    if Path("test_output.log").exists():
        print("\nTest appears to have run, check logs for issues")
    else:
        print("\nTest may not have completed successfully")
        
else:
    try:
        with open(report_path) as f:
            report = json.load(f)
        
        summary = report.get('summary', {})
        
        print(f"\nTotal Tasks: {summary.get('total_tasks', 'N/A')}")
        print(f"Correct Tasks: {summary.get('correct_tasks', 'N/A')}")
        
        accuracy = summary.get('accuracy', 0)
        print(f"\nAccuracy: {accuracy:.2%}")
        print(f"Target (58%): {'✓ PASS' if accuracy >= 0.58 else '✗ FAIL'}")
        
        print(f"\nAverage Confidence: {summary.get('avg_confidence', 0):.3f}")
        print(f"Average Adaptation Time: {summary.get('avg_adaptation_time_sec', 0):.1f}s")
        print(f"Average Inference Time: {summary.get('avg_inference_time_sec', 0):.1f}s")
        
        total_avg_time = summary.get('avg_adaptation_time_sec', 0) + summary.get('avg_inference_time_sec', 0)
        print(f"\nTotal Average Time: {total_avg_time:.1f}s ({total_avg_time/60:.1f} min)")
        print(f"Time Target (<5 min): {'✓ PASS' if total_avg_time < 300 else '✗ FAIL'}")
        
        print(f"\nTotal Runtime: {summary.get('total_time_sec', 0)/3600:.2f} hours")
        
        # Difficulty breakdown
        if 'difficulty_breakdown' in report:
            print("\n" + "=" * 60)
            print("DIFFICULTY BREAKDOWN")
            print("=" * 60)
            for difficulty, metrics in report['difficulty_breakdown'].items():
                print(f"\n{difficulty.upper()}:")
                print(f"  Tasks: {metrics.get('total', 'N/A')}")
                print(f"  Correct: {metrics.get('correct', 'N/A')}")
                print(f"  Accuracy: {metrics.get('accuracy', 0):.2%}")
                print(f"  Avg Confidence: {metrics.get('avg_confidence', 0):.3f}")
                print(f"  Avg Time: {metrics.get('avg_time', 0):.1f}s")
        
        print("\n" + "=" * 60)
        
    except Exception as e:
        print(f"✗ Error reading results file: {e}")
        print("File may be corrupted or incomplete")

## Cell 7: Run Performance Optimization Benchmarks

**This will take 2-3 hours to complete**

Benchmarks optimization techniques:
- KV-cache optimization
- Static cache
- torch.compile
- Combined speedup measurement

In [None]:
# Run performance optimization test
!pytest tests/performance/test_ttt_inference_time.py::test_combined_optimizations \
    -v -s --no-header --tb=short \
    --timeout=14400  # 4 hour timeout

print("\n" + "="*60)
print("PERFORMANCE OPTIMIZATION TEST COMPLETE")
print("="*60)

## Cell 8: Analyze Performance Results

In [None]:
import json
from pathlib import Path

perf_report_path = Path("validation_results/ttt_inference_optimization_report.json")

if perf_report_path.exists():
    with open(perf_report_path) as f:
        perf_report = json.load(f)
    
    speedup = perf_report['speedup_summary']
    
    print("=" * 60)
    print("PERFORMANCE OPTIMIZATION RESULTS")
    print("=" * 60)
    
    print(f"\nBaseline Average Time: {speedup['avg_baseline_sec']:.1f}s")
    print(f"Optimized Average Time: {speedup['avg_optimized_sec']:.1f}s")
    
    print(f"\nSpeedup Factor: {speedup['speedup_factor']:.2f}x")
    print(f"Time Reduction: {speedup['reduction_pct']:.1f}%")
    
    print(f"\nTarget Speedup (1.30x): {'✓ PASS' if speedup['speedup_factor'] >= 1.30 else '✗ FAIL'}")
    print(f"Target Time (<5 min): {'✓ PASS' if speedup['avg_optimized_sec'] < 300 else '✗ FAIL'}")
    
    print(f"\nBaseline Samples: {speedup['baseline_samples']}")
    print(f"Optimized Samples: {speedup['optimized_samples']}")
    
    print("\n" + "=" * 60)
    
else:
    print("⚠️ Performance results file not found!")
    print(f"Expected: {perf_report_path}")

## Cell 9: Final Validation Summary

In [None]:
import json
from pathlib import Path
from datetime import datetime

print("=" * 60)
print("FINAL VALIDATION SUMMARY")
print("=" * 60)
print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Platform: Kaggle")

# Check accuracy results
accuracy_report = Path("validation_results/enhanced_ttt_report.json")
perf_report = Path("validation_results/ttt_inference_optimization_report.json")

all_passed = True

print("\n" + "-" * 60)
print("ACCURACY VALIDATION")
print("-" * 60)

if accuracy_report.exists():
    with open(accuracy_report) as f:
        acc_data = json.load(f)
    
    accuracy = acc_data['summary']['accuracy']
    avg_time = (acc_data['summary']['avg_adaptation_time_sec'] + 
                acc_data['summary']['avg_inference_time_sec'])
    
    acc_pass = accuracy >= 0.58
    time_pass = avg_time < 300
    
    print(f"Accuracy: {accuracy:.2%} - {'✓ PASS' if acc_pass else '✗ FAIL'} (target: 58%+)")
    print(f"Avg Time: {avg_time:.1f}s - {'✓ PASS' if time_pass else '✗ FAIL'} (target: <300s)")
    
    if not (acc_pass and time_pass):
        all_passed = False
else:
    print("✗ FAIL - No accuracy report found")
    all_passed = False

print("\n" + "-" * 60)
print("PERFORMANCE OPTIMIZATION VALIDATION")
print("-" * 60)

if perf_report.exists():
    with open(perf_report) as f:
        perf_data = json.load(f)
    
    speedup_factor = perf_data['speedup_summary']['speedup_factor']
    opt_time = perf_data['speedup_summary']['avg_optimized_sec']
    
    speedup_pass = speedup_factor >= 1.30
    opt_time_pass = opt_time < 300
    
    print(f"Speedup: {speedup_factor:.2f}x - {'✓ PASS' if speedup_pass else '✗ FAIL'} (target: 1.30x+)")
    print(f"Opt Time: {opt_time:.1f}s - {'✓ PASS' if opt_time_pass else '✗ FAIL'} (target: <300s)")
    
    if not (speedup_pass and opt_time_pass):
        all_passed = False
else:
    print("✗ FAIL - No performance report found")
    all_passed = False

print("\n" + "=" * 60)
print(f"OVERALL STATUS: {'✓ ALL TESTS PASSED' if all_passed else '✗ SOME TESTS FAILED'}")
print("=" * 60)

# Save summary
summary = {
    'timestamp': datetime.now().isoformat(),
    'platform': 'kaggle',
    'all_passed': all_passed,
    'accuracy_validation': {
        'exists': accuracy_report.exists(),
        'passed': acc_pass if accuracy_report.exists() else False
    },
    'performance_validation': {
        'exists': perf_report.exists(),
        'passed': (speedup_pass and opt_time_pass) if perf_report.exists() else False
    }
}

summary_path = Path("validation_results/kaggle_validation_summary.json")
summary_path.parent.mkdir(parents=True, exist_ok=True)
with open(summary_path, 'w') as f:
    json.dump(summary, f, indent=2)

print(f"\nSummary saved to: {summary_path}")

## Cell 10: Download Results

Download the validation reports to your local machine

In [None]:
# Create downloadable archive
import shutil
from pathlib import Path

# Create archive of results
results_dir = Path("validation_results")
if results_dir.exists():
    archive_name = "enhanced_ttt_validation_results"
    shutil.make_archive(archive_name, 'zip', results_dir)
    print(f"✓ Created archive: {archive_name}.zip")
    
    # In Kaggle, you can download from the output tab
    print("\nTo download results:")
    print("1. Click the 'Output' tab on the right")
    print(f"2. Download '{archive_name}.zip'")
    print("3. Extract to review detailed reports")
else:
    print("⚠️ No results directory found")

# List available result files
print("\nAvailable result files:")
for file in results_dir.glob("*.json"):
    size_kb = file.stat().st_size / 1024
    print(f"  - {file.name} ({size_kb:.1f} KB)")