In [2]:
#!/usr/bin/env python3
"""
gc_specific_calibration.py

Deriva y aplica correcciones específicas para cúmulos globulares
usando el catálogo de Taylor como referencia directa.
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

def analyze_current_offsets(taylor_df, aperture=3):
    """
    Analiza los offsets actuales entre SPLUS y Taylor para entender los patrones
    """
    print("📊 ANALIZANDO OFFSETS ACTUALES SPLUS vs TAYLOR")
    print("="*70)
    
    filter_correspondences = [
        ('MAG_F378_3', 'umag', 'F378 vs u-band'),
        ('MAG_F395_3', 'umag', 'F395 vs u-band'),
        ('MAG_F410_3', 'gmag', 'F410 vs g-band'),
        ('MAG_F430_3', 'gmag', 'F430 vs g-band'),
        ('MAG_F515_3', 'gmag', 'F515 vs g-band'),
        ('MAG_F660_3', 'rmag', 'F660 vs r-band'),
        ('MAG_F861_3', 'imag', 'F861 vs i-band')
    ]
    
    offsets_analysis = []
    
    # Crear figura para análisis de offsets
    fig, axes = plt.subplots(3, 3, figsize=(15, 12))
    axes = axes.ravel()
    
    for i, (splus_col, taylor_col, title) in enumerate(filter_correspondences):
        if i >= len(axes):
            break
            
        if splus_col not in taylor_df.columns or taylor_col not in taylor_df.columns:
            continue
        
        # Filtrar datos válidos
        valid_mask = (
            taylor_df[splus_col].notna() & 
            taylor_df[taylor_col].notna() &
            np.isfinite(taylor_df[splus_col]) & 
            np.isfinite(taylor_df[taylor_col]) &
            (taylor_df[splus_col] < 90) & 
            (taylor_df[taylor_col] < 90) &
            (taylor_df[splus_col] > 10) & 
            (taylor_df[taylor_col] > 10)
        )
        
        if valid_mask.sum() < 10:
            continue
        
        splus_mags = taylor_df.loc[valid_mask, splus_col]
        taylor_mags = taylor_df.loc[valid_mask, taylor_col]
        differences = splus_mags - taylor_mags
        
        # Estadísticas
        median_diff = np.median(differences)
        mean_diff = np.mean(differences)
        std_diff = np.std(differences)
        mad_diff = np.median(np.abs(differences - median_diff))
        
        offsets_analysis.append({
            'splus_filter': splus_col.replace(f'_{aperture}', ''),
            'taylor_filter': taylor_col,
            'n_sources': len(differences),
            'median_offset': median_diff,
            'mean_offset': mean_diff,
            'std_offset': std_diff,
            'mad_offset': mad_diff,
            'taylor_range': f"{taylor_mags.min():.1f}-{taylor_mags.max():.1f}",
            'splus_range': f"{splus_mags.min():.1f}-{splus_mags.max():.1f}"
        })
        
        # Gráfico
        ax = axes[i]
        sc = ax.scatter(taylor_mags, differences, alpha=0.6, s=20, c=differences, cmap='coolwarm')
        ax.axhline(median_diff, color='red', linestyle='--', label=f'Mediana: {median_diff:.3f}')
        ax.axhline(0, color='black', linestyle='-', alpha=0.3)
        
        ax.set_xlabel(f'Taylor {taylor_col}')
        ax.set_ylabel('Δ (SPLUS - Taylor)')
        ax.set_title(f'{title}\nMediana: {median_diff:.3f} ± {mad_diff:.3f}')
        ax.legend(fontsize=8)
        ax.grid(True, alpha=0.3)
        
        plt.colorbar(sc, ax=ax, label='Δ (SPLUS - Taylor)')
    
    # Ocultar ejes vacíos
    for j in range(i+1, len(axes)):
        axes[j].set_visible(False)
    
    plt.suptitle('Análisis de Offsets Actuales - SPLUS vs Taylor', fontsize=16)
    plt.tight_layout()
    plt.savefig('current_offsets_analysis.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # Crear resumen tabular
    offsets_df = pd.DataFrame(offsets_analysis)
    print("\n📋 RESUMEN DE OFFSETS ACTUALES:")
    print("="*70)
    print(offsets_df.to_string(index=False))
    
    return offsets_df

def derive_simple_gc_corrections(offsets_df):
    """
    Deriva correcciones simples basadas en los offsets medianos observados
    """
    print("\n🎯 DERIVANDO CORRECCIONES SIMPLES PARA CÚMULOS GLOBULARES")
    print("="*70)
    
    # Estrategia: usar el offset mediano como corrección simple
    corrections = []
    
    for _, row in offsets_df.iterrows():
        splus_filter = row['splus_filter']
        median_offset = row['median_offset']
        
        # Para corrección simple: mag_corregida = mag_original - offset_mediano
        correction = -median_offset  # Restamos el offset para corregir
        
        corrections.append({
            'splus_filter': splus_filter,
            'taylor_filter': row['taylor_filter'],
            'median_offset': median_offset,
            'simple_correction': correction,
            'correction_equation': f'{splus_filter}_corrected = {splus_filter} + {correction:.4f}',
            'n_sources': row['n_sources'],
            'mad_original': row['mad_offset']
        })
        
        print(f"✅ {splus_filter}: Corrección = {correction:.4f} (offset mediano: {median_offset:.4f})")
    
    corrections_df = pd.DataFrame(corrections)
    return corrections_df

def apply_simple_gc_corrections(taylor_df, corrections_df, aperture=3):
    """
    Aplica correcciones simples basadas en offsets medianos
    """
    print("\n🔧 APLICANDO CORRECCIONES SIMPLES")
    print("="*70)
    
    corrected_df = taylor_df.copy()
    
    for _, correction in corrections_df.iterrows():
        splus_filter = correction['splus_filter']
        splus_col = f'{splus_filter}_{aperture}'
        
        if splus_col not in corrected_df.columns:
            continue
        
        # Aplicar corrección simple
        correction_value = correction['simple_correction']
        corrected_col = f'{splus_filter}_simple_corrected_{aperture}'
        corrected_df[corrected_col] = corrected_df[splus_col] + correction_value
        
        # También aplicar corrección a otras aperturas si existen
        for other_aperture in [4, 5, 6]:
            other_col = f'{splus_filter}_{other_aperture}'
            if other_col in corrected_df.columns:
                corrected_other_col = f'{splus_filter}_simple_corrected_{other_aperture}'
                corrected_df[corrected_other_col] = corrected_df[other_col] + correction_value
        
        print(f"✅ {splus_col} -> {corrected_col} (corrección: {correction_value:.4f})")
    
    return corrected_df

def evaluate_simple_corrections(original_df, corrected_df, corrections_df, aperture=3):
    """
    Evalúa la efectividad de las correcciones simples
    """
    print("\n📈 EVALUANDO CORRECCIONES SIMPLES")
    print("="*70)
    
    results = []
    
    # Crear figura comparativa
    fig, axes = plt.subplots(3, 3, figsize=(15, 12))
    axes = axes.ravel()
    
    plot_count = 0
    
    for i, correction in corrections_df.iterrows():
        if plot_count >= len(axes):
            break
            
        splus_filter = correction['splus_filter']
        splus_col = f'{splus_filter}_{aperture}'
        corrected_col = f'{splus_filter}_simple_corrected_{aperture}'
        taylor_col = correction['taylor_filter']
        
        if splus_col not in original_df.columns or corrected_col not in corrected_df.columns:
            continue
        
        # Datos originales
        valid_original = (
            original_df[splus_col].notna() & 
            original_df[taylor_col].notna() &
            np.isfinite(original_df[splus_col]) & 
            np.isfinite(original_df[taylor_col]) &
            (original_df[splus_col] < 90) & 
            (original_df[taylor_col] < 90)
        )
        
        # Datos corregidos
        valid_corrected = (
            corrected_df[corrected_col].notna() & 
            corrected_df[taylor_col].notna() &
            np.isfinite(corrected_df[corrected_col]) & 
            np.isfinite(corrected_df[taylor_col]) &
            (corrected_df[corrected_col] < 90) & 
            (corrected_df[taylor_col] < 90)
        )
        
        if valid_original.sum() < 5 or valid_corrected.sum() < 5:
            continue
        
        # Calcular diferencias
        diff_original = original_df.loc[valid_original, splus_col] - original_df.loc[valid_original, taylor_col]
        diff_corrected = corrected_df.loc[valid_corrected, corrected_col] - corrected_df.loc[valid_corrected, taylor_col]
        
        # Estadísticas
        med_orig = np.median(diff_original)
        med_corr = np.median(diff_corrected)
        mad_orig = np.median(np.abs(diff_original - med_orig))
        mad_corr = np.median(np.abs(diff_corrected - med_corr))
        
        results.append({
            'filter': splus_filter,
            'taylor_filter': taylor_col,
            'n_original': len(diff_original),
            'n_corrected': len(diff_corrected),
            'median_original': med_orig,
            'median_corrected': med_corr,
            'mad_original': mad_orig,
            'mad_corrected': mad_corr,
            'improvement': mad_orig - mad_corr,
            'offset_reduction': abs(med_orig) - abs(med_corr),
            'applied_correction': correction['simple_correction']
        })
        
        # Gráfico comparativo
        ax = axes[plot_count]
        
        # Original (rojo)
        ax.scatter(original_df.loc[valid_original, taylor_col], 
                  original_df.loc[valid_original, splus_col], 
                  alpha=0.4, s=15, c='red', label=f'Original: Δ={med_orig:.3f}')
        
        # Corregido (azul)
        ax.scatter(corrected_df.loc[valid_corrected, taylor_col], 
                  corrected_df.loc[valid_corrected, corrected_col], 
                  alpha=0.6, s=15, c='blue', label=f'Corregido: Δ={med_corr:.3f}')
        
        # Línea 1:1
        x_range = np.linspace(
            min(original_df.loc[valid_original, taylor_col].min(), 
                corrected_df.loc[valid_corrected, taylor_col].min()),
            max(original_df.loc[valid_original, taylor_col].max(), 
                corrected_df.loc[valid_corrected, taylor_col].max()), 
            100
        )
        ax.plot(x_range, x_range, 'k--', alpha=0.5, label='1:1')
        
        ax.set_xlabel(f'Taylor {taylor_col}')
        ax.set_ylabel(f'SPLUS {splus_filter}')
        ax.set_title(f'{splus_filter}\nMAD: {mad_orig:.3f}→{mad_corr:.3f}')
        ax.legend(fontsize=8)
        ax.grid(True, alpha=0.3)
        
        plot_count += 1
    
    # Ocultar ejes vacíos
    for j in range(plot_count, len(axes)):
        axes[j].set_visible(False)
    
    plt.suptitle('Comparación - Correcciones Simples vs Original', fontsize=16)
    plt.tight_layout()
    plt.savefig('simple_corrections_evaluation.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    return pd.DataFrame(results)

def create_final_recommendations(results_df, corrections_df):
    """
    Crea recomendaciones basadas en los resultados
    """
    print("\n💡 RECOMENDACIONES FINALES")
    print("="*70)
    
    if results_df.empty:
        print("❌ No hay resultados para analizar")
        return
    
    # Análisis de mejora
    successful_corrections = results_df[results_df['improvement'] > 0]
    failed_corrections = results_df[results_df['improvement'] <= 0]
    
    print("✅ CORRECCIONES EXITOSAS (mejora en MAD):")
    for _, row in successful_corrections.iterrows():
        print(f"   {row['filter']}: MAD mejoró {row['improvement']:.4f} mag, offset redujo {row['offset_reduction']:.4f} mag")
    
    if len(failed_corrections) > 0:
        print("\n⚠️  CORRECCIONES PROBLEMÁTICAS:")
        for _, row in failed_corrections.iterrows():
            print(f"   {row['filter']}: MAD empeoró {abs(row['improvement']):.4f} mag")
    
    # Estadísticas generales
    avg_improvement = results_df['improvement'].mean()
    avg_offset_reduction = results_df['offset_reduction'].mean()
    
    print(f"\n📊 ESTADÍSTICAS GLOBALES:")
    print(f"   Mejora promedio en MAD: {avg_improvement:.4f} mag")
    print(f"   Reducción promedio de offset: {avg_offset_reduction:.4f} mag")
    
    if avg_improvement > 0 and avg_offset_reduction > 0:
        print("🎯 CONCLUSIÓN: Las correcciones simples SON EFECTIVAS")
        print("   Recomendación: Usar estas correcciones para análisis posteriores")
    else:
        print("🎯 CONCLUSIÓN: Las correcciones simples NO SON SUFICIENTES")
        print("   Recomendación: Considerar métodos más sofisticados o verificar calibración")
    
    # Crear ecuaciones finales
    print(f"\n🧮 ECUACIONES DE CORRECCIÓN RECOMENDADAS:")
    for _, row in results_df.iterrows():
        correction = corrections_df[corrections_df['splus_filter'] == row['filter']]['simple_correction'].iloc[0]
        print(f"   {row['filter']}_corrected = {row['filter']} + {correction:.4f}")

def main():
    """Función principal"""
    print("🚀 CALIBRACIÓN ESPECÍFICA PARA CÚMULOS GLOBULARES")
    print("="*70)
    
    # Cargar datos
    taylor_file = '../anac_data/Results/all_fields_gc_photometry_identical.csv'
    
    try:
        taylor_df = pd.read_csv(taylor_file)
        print(f"✅ Cargados {len(taylor_df)} cúmulos globulares")
    except Exception as e:
        print(f"❌ Error cargando datos: {e}")
        return
    
    # 1. Analizar offsets actuales
    offsets_df = analyze_current_offsets(taylor_df)
    
    if offsets_df.empty:
        print("❌ No se pudieron analizar offsets")
        return
    
    # 2. Derivar correcciones simples
    corrections_df = derive_simple_gc_corrections(offsets_df)
    corrections_df.to_csv('gc_simple_corrections.csv', index=False)
    print(f"✅ Correcciones guardadas en: gc_simple_corrections.csv")
    
    # 3. Aplicar correcciones
    corrected_df = apply_simple_gc_corrections(taylor_df, corrections_df)
    corrected_df.to_csv('taylor_gc_simple_corrected.csv', index=False)
    print(f"✅ Datos corregidos guardados en: taylor_gc_simple_corrected.csv")
    
    # 4. Evaluar resultados
    results_df = evaluate_simple_corrections(taylor_df, corrected_df, corrections_df)
    
    if not results_df.empty:
        results_df.to_csv('gc_simple_correction_results.csv', index=False)
        print(f"✅ Resultados de evaluación guardados en: gc_simple_correction_results.csv")
        
        # 5. Mostrar recomendaciones
        create_final_recommendations(results_df, corrections_df)
    else:
        print("❌ No se pudieron evaluar las correcciones")
    
    print("\n" + "="*70)
    print("ANÁLISIS COMPLETADO")
    print("="*70)
    print("ARCHIVOS GENERADOS:")
    print("✅ current_offsets_analysis.png - Análisis visual de offsets")
    print("✅ gc_simple_corrections.csv - Correcciones derivadas") 
    print("✅ taylor_gc_simple_corrected.csv - Datos corregidos")
    print("✅ simple_corrections_evaluation.png - Evaluación visual")
    print("✅ gc_simple_correction_results.csv - Resultados estadísticos")

if __name__ == '__main__':
    main()

🚀 CALIBRACIÓN ESPECÍFICA PARA CÚMULOS GLOBULARES
✅ Cargados 181 cúmulos globulares
📊 ANALIZANDO OFFSETS ACTUALES SPLUS vs TAYLOR

📋 RESUMEN DE OFFSETS ACTUALES:
splus_filter taylor_filter  n_sources  median_offset  mean_offset  std_offset  mad_offset taylor_range splus_range
    MAG_F378          umag        155      -0.732993    -0.572679    0.885163    0.379215    19.9-24.2   19.3-26.0
    MAG_F395          umag        150      -0.933365    -0.777624    1.124301    0.464626    19.9-24.1   19.2-28.1
    MAG_F410          gmag        155       0.350401     0.467641    0.678825    0.271231    18.5-22.3   18.8-26.0
    MAG_F430          gmag        163       0.275781     0.465629    1.108700    0.257459    18.5-22.4   18.8-28.6
    MAG_F515          gmag        169      -0.354073    -0.149681    1.142018    0.148569    18.5-22.4   18.2-29.3
    MAG_F660          rmag        166      -0.240070    -0.221061    0.685779    0.113005    18.2-21.4   12.4-22.6
    MAG_F861          imag        