# Enhanced TTT Strategy - GPU Validation

This notebook validates the Enhanced TTT Strategy implementation on Kaggle GPU.

**Target Metrics:**
- Accuracy: 58%+ on 100 evaluation tasks
- Inference Time: <5 minutes per task
- Performance Speedup: 30-40% from optimizations

**Runtime: ~6-9 hours total**

---

## Cell 1: Environment Check

In [None]:
import sys
import torch
import os
from pathlib import Path

print("=" * 60)
print("KAGGLE ENVIRONMENT CHECK")
print("=" * 60)

# Check CUDA availability
cuda_available = torch.cuda.is_available()
gpu_count = torch.cuda.device_count()
print(f"CUDA Available: {cuda_available}")
print(f"GPU Count: {gpu_count}")

if cuda_available:
    for i in range(gpu_count):
        gpu_name = torch.cuda.get_device_name(i)
        gpu_memory = torch.cuda.get_device_properties(i).total_memory / 1024**3
        print(f"GPU {i}: {gpu_name} ({gpu_memory:.1f} GB)")
else:
    print("\n⚠️ WARNING: No GPU detected! Enable GPU in notebook settings.")
    print("Settings → Accelerator → GPU T4 x2")

# Check Kaggle environment
print(f"\nKaggle Working Dir: {os.path.exists('/kaggle/working')}")
print(f"Kaggle Input Dir: {os.path.exists('/kaggle/input')}")
print(f"Python Version: {sys.version.split()[0]}")
print("=" * 60)

## Cell 2: Clone Repository & Setup

In [None]:
# Clone the repository
REPO_URL = "https://github.com/18h32n/shockabo.git"

print(f"Cloning repository from: {REPO_URL}")

# Option A: Clone from GitHub (if repo is public)
try:
    !git clone -b master {REPO_URL} /kaggle/working/arc-prize-2025
    print("✓ Repository cloned successfully")
except Exception as e:
    print(f"✗ Clone failed: {e}")
    print("Checking if directory already exists...")
    import os
    if os.path.exists("/kaggle/working/arc-prize-2025"):
        print("Directory exists, proceeding...")
    else:
        print("Alternative: Upload code as Kaggle dataset")
        raise

# Option B: If you uploaded code as Kaggle dataset, uncomment:
# !cp -r /kaggle/input/arc-prize-2025-code/* /kaggle/working/arc-prize-2025/

# Change to project directory
%cd /kaggle/working/arc-prize-2025

# Verify we're in the right place
!pwd
!ls -la

# Run Kaggle setup script
print("Running Kaggle setup script...")
try:
    !python scripts/platform_deploy/kaggle_setup.py
    print("✓ Setup script completed")
except Exception as e:
    print(f"⚠️ Setup script issue: {e}")
    print("Proceeding anyway...")

## Cell 3: Install Additional Dependencies

In [None]:
# Install test dependencies
!pip install -q pytest pytest-asyncio pytest-timeout pytest-mock

# Install transformers and related packages
!pip install -q transformers>=4.35.0 accelerate>=0.25.0 bitsandbytes>=0.41.0 peft>=0.7.0

# Verify installations
import pytest
import transformers
import accelerate
import peft

print(f"pytest: {pytest.__version__}")
print(f"transformers: {transformers.__version__}")
print(f"accelerate: {accelerate.__version__}")
print(f"peft: {peft.__version__}")
print("\n✓ All dependencies installed successfully!")

## Cell 4: Verify ARC Dataset

In [None]:
# Check for ARC dataset in Kaggle input
import json
from pathlib import Path

# Common locations for ARC dataset in Kaggle
dataset_locations = [
    "/kaggle/input/arc-prize-2025",
    "/kaggle/input/abstraction-and-reasoning-corpus",
    "/kaggle/input/arc-agi",
    "arc-prize-2025/data/downloaded"  # Local copy
]

dataset_path = None
for loc in dataset_locations:
    test_path = Path(loc)
    if test_path.exists():
        print(f"Found dataset at: {loc}")
        dataset_path = test_path
        break

if dataset_path:
    # Look for evaluation files
    eval_files = list(dataset_path.glob("**/arc-agi_evaluation_challenges.json"))
    solution_files = list(dataset_path.glob("**/arc-agi_evaluation_solutions.json"))
    
    if eval_files and solution_files:
        eval_path = eval_files[0]
        solution_path = solution_files[0]
        
        # Load and verify
        with open(eval_path) as f:
            challenges = json.load(f)
        with open(solution_path) as f:
            solutions = json.load(f)
        
        print(f"\n✓ Dataset verified!")
        print(f"  Challenges: {len(challenges)} tasks")
        print(f"  Solutions: {len(solutions)} tasks")
        print(f"  Evaluation path: {eval_path}")
        print(f"  Solutions path: {solution_path}")
        
        # Create symlink or copy to expected location
        expected_dir = Path("arc-prize-2025/data/downloaded")
        expected_dir.mkdir(parents=True, exist_ok=True)
        
        import shutil
        shutil.copy(eval_path, expected_dir / "arc-agi_evaluation_challenges.json")
        shutil.copy(solution_path, expected_dir / "arc-agi_evaluation_solutions.json")
        print(f"\n✓ Dataset copied to: {expected_dir}")
    else:
        print(f"\n⚠️ Dataset files not found in {dataset_path}")
else:
    print("\n⚠️ ARC dataset not found!")
    print("Please add ARC dataset to your notebook:")
    print("  1. Click '+ Add Data' button")
    print("  2. Search for 'ARC Prize 2025' or 'abstraction reasoning'")
    print("  3. Click 'Add' and re-run this cell")

## Cell 5: Run Enhanced TTT Accuracy Validation

**This will take 4-6 hours to complete**

Tests 100 evaluation tasks with:
- Leave-one-out generation
- Self-consistency validation
- LoRA optimization
- Memory efficient batch processing

In [None]:
# Run enhanced TTT accuracy test
import subprocess
import sys
from pathlib import Path

print("Starting Enhanced TTT Accuracy Validation...")
print("This will take 4-6 hours to complete")
print("=" * 60)

# Check if test file exists
test_file = Path("tests/integration/test_enhanced_ttt_accuracy.py")
if not test_file.exists():
    print(f"✗ Test file not found: {test_file}")
    print("Available test files:")
    for f in Path("tests").rglob("*.py"):
        print(f"  - {f}")
    sys.exit(1)

try:
    # Run the test with proper error handling
    result = subprocess.run([
        sys.executable, "-m", "pytest", 
        "tests/integration/test_enhanced_ttt_accuracy.py::test_enhanced_ttt_accuracy",
        "-v", "-s", "--no-header", "--tb=short",
        "--timeout=25200"  # 7 hour timeout
    ], capture_output=False, text=True, timeout=25200)
    
    if result.returncode == 0:
        print("\n✓ Enhanced TTT Accuracy Test PASSED")
    else:
        print(f"\n⚠️ Enhanced TTT Accuracy Test returned code: {result.returncode}")
        print("Check logs above for details")
        
except subprocess.TimeoutExpired:
    print("\n⚠️ Test timed out after 7 hours")
except Exception as e:
    print(f"\n✗ Test execution failed: {e}")

print("\n" + "="*60)
print("ENHANCED TTT ACCURACY TEST COMPLETE")
print("="*60)

## Cell 6: Analyze Accuracy Results

In [None]:
import json
from pathlib import Path

print("=" * 60)
print("ENHANCED TTT ACCURACY RESULTS")
print("=" * 60)

report_path = Path("validation_results/enhanced_ttt_report.json")

if not report_path.exists():
    print("⚠️ Results file not found!")
    print(f"Expected: {report_path}")
    
    # Try to find alternative result files
    results_dir = Path("validation_results")
    if results_dir.exists():
        print("\nAvailable result files:")
        for f in results_dir.glob("*.json"):
            print(f"  - {f}")
    else:
        print("No results directory found")
        
    # Check if test ran at all
    if Path("test_output.log").exists():
        print("\nTest appears to have run, check logs for issues")
    else:
        print("\nTest may not have completed successfully")
        
else:
    try:
        with open(report_path) as f:
            report = json.load(f)
        
        summary = report.get('summary', {})
        
        print(f"\nTotal Tasks: {summary.get('total_tasks', 'N/A')}")
        print(f"Correct Tasks: {summary.get('correct_tasks', 'N/A')}")
        
        accuracy = summary.get('accuracy', 0)
        print(f"\nAccuracy: {accuracy:.2%}")
        print(f"Target (58%): {'✓ PASS' if accuracy >= 0.58 else '✗ FAIL'}")
        
        print(f"\nAverage Confidence: {summary.get('avg_confidence', 0):.3f}")
        print(f"Average Adaptation Time: {summary.get('avg_adaptation_time_sec', 0):.1f}s")
        print(f"Average Inference Time: {summary.get('avg_inference_time_sec', 0):.1f}s")
        
        total_avg_time = summary.get('avg_adaptation_time_sec', 0) + summary.get('avg_inference_time_sec', 0)
        print(f"\nTotal Average Time: {total_avg_time:.1f}s ({total_avg_time/60:.1f} min)")
        print(f"Time Target (<5 min): {'✓ PASS' if total_avg_time < 300 else '✗ FAIL'}")
        
        print(f"\nTotal Runtime: {summary.get('total_time_sec', 0)/3600:.2f} hours")
        
        # Difficulty breakdown
        if 'difficulty_breakdown' in report:
            print("\n" + "=" * 60)
            print("DIFFICULTY BREAKDOWN")
            print("=" * 60)
            for difficulty, metrics in report['difficulty_breakdown'].items():
                print(f"\n{difficulty.upper()}:")
                print(f"  Tasks: {metrics.get('total', 'N/A')}")
                print(f"  Correct: {metrics.get('correct', 'N/A')}")
                print(f"  Accuracy: {metrics.get('accuracy', 0):.2%}")
                print(f"  Avg Confidence: {metrics.get('avg_confidence', 0):.3f}")
                print(f"  Avg Time: {metrics.get('avg_time', 0):.1f}s")
        
        print("\n" + "=" * 60)
        
    except Exception as e:
        print(f"✗ Error reading results file: {e}")
        print("File may be corrupted or incomplete")

## Cell 7: Run Performance Optimization Benchmarks

**This will take 2-3 hours to complete**

Benchmarks optimization techniques:
- KV-cache optimization
- Static cache
- torch.compile
- Combined speedup measurement

In [None]:
# Run performance optimization test
!pytest tests/performance/test_ttt_inference_time.py::test_combined_optimizations \
    -v -s --no-header --tb=short \
    --timeout=14400  # 4 hour timeout

print("\n" + "="*60)
print("PERFORMANCE OPTIMIZATION TEST COMPLETE")
print("="*60)

## Cell 8: Analyze Performance Results

In [None]:
import json
from pathlib import Path

perf_report_path = Path("validation_results/ttt_inference_optimization_report.json")

if perf_report_path.exists():
    with open(perf_report_path) as f:
        perf_report = json.load(f)
    
    speedup = perf_report['speedup_summary']
    
    print("=" * 60)
    print("PERFORMANCE OPTIMIZATION RESULTS")
    print("=" * 60)
    
    print(f"\nBaseline Average Time: {speedup['avg_baseline_sec']:.1f}s")
    print(f"Optimized Average Time: {speedup['avg_optimized_sec']:.1f}s")
    
    print(f"\nSpeedup Factor: {speedup['speedup_factor']:.2f}x")
    print(f"Time Reduction: {speedup['reduction_pct']:.1f}%")
    
    print(f"\nTarget Speedup (1.30x): {'✓ PASS' if speedup['speedup_factor'] >= 1.30 else '✗ FAIL'}")
    print(f"Target Time (<5 min): {'✓ PASS' if speedup['avg_optimized_sec'] < 300 else '✗ FAIL'}")
    
    print(f"\nBaseline Samples: {speedup['baseline_samples']}")
    print(f"Optimized Samples: {speedup['optimized_samples']}")
    
    print("\n" + "=" * 60)
    
else:
    print("⚠️ Performance results file not found!")
    print(f"Expected: {perf_report_path}")

## Cell 9: Final Validation Summary

In [None]:
import json
from pathlib import Path
from datetime import datetime

print("=" * 60)
print("FINAL VALIDATION SUMMARY")
print("=" * 60)
print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Platform: Kaggle")

# Check accuracy results
accuracy_report = Path("validation_results/enhanced_ttt_report.json")
perf_report = Path("validation_results/ttt_inference_optimization_report.json")

all_passed = True

print("\n" + "-" * 60)
print("ACCURACY VALIDATION")
print("-" * 60)

if accuracy_report.exists():
    with open(accuracy_report) as f:
        acc_data = json.load(f)
    
    accuracy = acc_data['summary']['accuracy']
    avg_time = (acc_data['summary']['avg_adaptation_time_sec'] + 
                acc_data['summary']['avg_inference_time_sec'])
    
    acc_pass = accuracy >= 0.58
    time_pass = avg_time < 300
    
    print(f"Accuracy: {accuracy:.2%} - {'✓ PASS' if acc_pass else '✗ FAIL'} (target: 58%+)")
    print(f"Avg Time: {avg_time:.1f}s - {'✓ PASS' if time_pass else '✗ FAIL'} (target: <300s)")
    
    if not (acc_pass and time_pass):
        all_passed = False
else:
    print("✗ FAIL - No accuracy report found")
    all_passed = False

print("\n" + "-" * 60)
print("PERFORMANCE OPTIMIZATION VALIDATION")
print("-" * 60)

if perf_report.exists():
    with open(perf_report) as f:
        perf_data = json.load(f)
    
    speedup_factor = perf_data['speedup_summary']['speedup_factor']
    opt_time = perf_data['speedup_summary']['avg_optimized_sec']
    
    speedup_pass = speedup_factor >= 1.30
    opt_time_pass = opt_time < 300
    
    print(f"Speedup: {speedup_factor:.2f}x - {'✓ PASS' if speedup_pass else '✗ FAIL'} (target: 1.30x+)")
    print(f"Opt Time: {opt_time:.1f}s - {'✓ PASS' if opt_time_pass else '✗ FAIL'} (target: <300s)")
    
    if not (speedup_pass and opt_time_pass):
        all_passed = False
else:
    print("✗ FAIL - No performance report found")
    all_passed = False

print("\n" + "=" * 60)
print(f"OVERALL STATUS: {'✓ ALL TESTS PASSED' if all_passed else '✗ SOME TESTS FAILED'}")
print("=" * 60)

# Save summary
summary = {
    'timestamp': datetime.now().isoformat(),
    'platform': 'kaggle',
    'all_passed': all_passed,
    'accuracy_validation': {
        'exists': accuracy_report.exists(),
        'passed': acc_pass if accuracy_report.exists() else False
    },
    'performance_validation': {
        'exists': perf_report.exists(),
        'passed': (speedup_pass and opt_time_pass) if perf_report.exists() else False
    }
}

summary_path = Path("validation_results/kaggle_validation_summary.json")
summary_path.parent.mkdir(parents=True, exist_ok=True)
with open(summary_path, 'w') as f:
    json.dump(summary, f, indent=2)

print(f"\nSummary saved to: {summary_path}")

## Cell 10: Download Results

Download the validation reports to your local machine

In [None]:
# Create downloadable archive
import shutil
from pathlib import Path

# Create archive of results
results_dir = Path("validation_results")
if results_dir.exists():
    archive_name = "enhanced_ttt_validation_results"
    shutil.make_archive(archive_name, 'zip', results_dir)
    print(f"✓ Created archive: {archive_name}.zip")
    
    # In Kaggle, you can download from the output tab
    print("\nTo download results:")
    print("1. Click the 'Output' tab on the right")
    print(f"2. Download '{archive_name}.zip'")
    print("3. Extract to review detailed reports")
else:
    print("⚠️ No results directory found")

# List available result files
print("\nAvailable result files:")
for file in results_dir.glob("*.json"):
    size_kb = file.stat().st_size / 1024
    print(f"  - {file.name} ({size_kb:.1f} KB)")