# Batch Processing for All Burst Events

This notebook processes all manually annotated burst events from `burst_list_240330_240729.csv` and generates 128×128 training windows for GAN training, separated by burst type.

## Data Overview:
- **Type 2**: 9 events (12%) 
- **Type 3**: 62 events (85%)
- **Type 5**: 2 events (3%)
- **Total**: 73 burst events


In [None]:
import pandas as pd
import os
from batch_processing import process_all_bursts_by_type, load_burst_catalog

print("✅ Batch processing imports successful!")


In [None]:
# Configuration
CATALOG_PATH = "/Users/remiliascarlet/Desktop/MDP/transfer_learning/burst_data/csv/original/burst_list_240330_240729.csv"
ORIGINAL_CSV_DIR = "/Users/remiliascarlet/Desktop/MDP/transfer_learning/burst_data/csv/original"
OUTPUT_BASE_DIR = "/Users/remiliascarlet/Desktop/MDP/transfer_learning/burst_data/csv/gan_training_windows_128"

print(f"📋 Configuration:")
print(f"   Burst catalog: {os.path.basename(CATALOG_PATH)}")
print(f"   Original CSV dir: {ORIGINAL_CSV_DIR}")
print(f"   Output base dir: {OUTPUT_BASE_DIR}")

# Load and preview burst catalog
burst_df = load_burst_catalog(CATALOG_PATH)
print(f"\n📄 Sample burst entries:")
print(burst_df.head())


In [None]:
# 🚀 Execute batch processing for all burst types
print("🚀 Starting batch processing...")
print("This will generate 4-minute windows with 50% overlap for all 73 burst events")
print("⚠️  This may take 10-30 minutes depending on your system")

# Run the batch processing with fast mode
results = process_all_bursts_by_type(
    catalog_path=CATALOG_PATH,
    original_csv_dir=ORIGINAL_CSV_DIR,
    output_base_dir=OUTPUT_BASE_DIR,
    window_duration=4*60,    # 4 minutes
    overlap_ratio=0.5,       # 50% overlap
    apply_denoising=True,    # Apply noise removal
    cleaning_method="fast"   # Use fast mode (skip Step 4 for speed)
)

print("\n🎉 Batch processing completed!")


In [None]:
# 📊 Analyze results and prepare for GAN training
print("📊 Analyzing generated training data...")

# Detailed analysis by type
for burst_type, result_info in results.items():
    print(f"\n🔍 Type {burst_type} Analysis:")
    print(f"   Successful processing: {result_info['successful_bursts']}/{result_info['total_bursts']} bursts")
    print(f"   Generated windows: {result_info['total_windows']}")
    print(f"   Output directory: {result_info['output_directory']}")
    print(f"   Average windows per burst: {result_info['total_windows']/result_info['successful_bursts'] if result_info['successful_bursts'] > 0 else 0:.1f}")
    
    # Check directory contents
    if os.path.exists(result_info['output_directory']):
        files = [f for f in os.listdir(result_info['output_directory']) if f.endswith('.csv')]
        print(f"   Files in directory: {len(files)}")
        if files:
            print(f"   Sample filenames:")
            for filename in files[:3]:
                print(f"     - {filename}")

# Total statistics
total_windows = sum(r['total_windows'] for r in results.values())
total_bursts = sum(r['successful_bursts'] for r in results.values())

print(f"\n🎯 GAN Training Data Ready:")
print(f"   Total windows: {total_windows}")
print(f"   Total bursts: {total_bursts}")
print(f"   Types available: {list(results.keys())}")

print(f"\n💡 Next Steps:")
print(f"   1. Train separate GANs for each type (recommended)")
print(f"   2. Or combine Type 3 + others for mixed training")
print(f"   3. Use the 128×128 CSV files directly in DCGAN training")

# Recommendations based on data size
print(f"\n📋 Training Recommendations:")
for burst_type, result_info in results.items():
    windows_count = result_info['total_windows']
    if windows_count >= 500:
        print(f"   Type {burst_type}: {windows_count} windows → ✅ Excellent for GAN training")
    elif windows_count >= 200:
        print(f"   Type {burst_type}: {windows_count} windows → ✅ Good for GAN training")
    elif windows_count >= 50:
        print(f"   Type {burst_type}: {windows_count} windows → ⚠️  Limited, consider data augmentation")
    else:
        print(f"   Type {burst_type}: {windows_count} windows → ❌ Too few, combine with others")
