In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from astropy.modeling import models, fitting

# Configuración de estilo para los gráficos
plt.style.use('default')
sns.set_palette("colorblind")

def analyze_photometry_coherence(df, aperture=3, mag_min=10, mag_max=90):
    """
    Analizar la coherencia entre las magnitudes SPLUS y las del catálogo de Taylor.
    Por defecto usa APERTURE = 3.
    """
    # Definir las correspondencias entre filtros (Taylor filter names deben existir en df)
    filter_correspondences = [
        ('MAG_F378', 'umag', 'F378 vs u-band'),
        ('MAG_F395', 'umag', 'F395 vs u-band (aproximado)'),
        ('MAG_F410', 'gmag', 'F410 vs g-band (aproximado)'),
        ('MAG_F430', 'gmag', 'F430 vs g-band (aproximado)'),
        ('MAG_F515', 'gmag', 'F515 vs g-band'),
        ('MAG_F660', 'rmag', 'F660 vs r-band'),
        ('MAG_F861', 'zmag', 'F861 vs i-band')
    ]
    
    results = {}
    n_plots = len(filter_correspondences)
    n_cols = 3
    n_rows = (n_plots + n_cols - 1) // n_cols
    
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5*n_rows))
    axes = axes.ravel()
    
    i = -1  # por si acaso no se procesa ningún filtro
    for i, (splus_filter, taylor_filter, title) in enumerate(filter_correspondences):
        splus_col = f'{splus_filter}_{aperture}'
        
        # Verificar que las columnas existen
        if splus_col not in df.columns or taylor_filter not in df.columns:
            print(f"Advertencia: {splus_col} o {taylor_filter} no encontrados. Saltando.")
            continue
            
        # Filtrar datos válidos (excluir valores > mag_max, < mag_min y NaN)
        valid_mask = (
            df[splus_col].notna() &
            df[taylor_filter].notna() &
            np.isfinite(df[splus_col]) &
            np.isfinite(df[taylor_filter]) &
            (df[splus_col] < mag_max) &
            (df[taylor_filter] < mag_max) &
            (df[splus_col] > mag_min) &
            (df[taylor_filter] > mag_min)
        )
        
        splus_mags = df.loc[valid_mask, splus_col]
        taylor_mags = df.loc[valid_mask, taylor_filter]
        
        if len(splus_mags) < 10:
            print(f"No hay suficientes datos válidos para {splus_col} vs {taylor_filter} (N={len(splus_mags)}). Saltando.")
            continue
        
        # Calcular diferencias
        differences = splus_mags - taylor_mags
        
        # Estadísticas
        mean_diff = np.mean(differences)
        median_diff = np.median(differences)
        std_diff = np.std(differences)
        mad_diff = np.median(np.abs(differences - median_diff))  # Desviación absoluta mediana
        correlation = stats.pearsonr(taylor_mags, splus_mags)[0]
        
        # Ajustar una línea recta robusta (LSQ)
        try:
            init_model = models.Linear1D(slope=1, intercept=0)
            fitter = fitting.LinearLSQFitter()
            fitted_model = fitter(init_model, taylor_mags, splus_mags)
            slope, intercept = float(fitted_model.slope.value), float(fitted_model.intercept.value)
        except Exception as e:
            print(f"Warning fitting linear model for {splus_col}: {e}")
            slope, intercept = 1.0, 0.0
        
        results[splus_filter] = {
            'taylor_filter': taylor_filter,
            'slope': slope,
            'intercept': intercept,
            'correlation': correlation,
            'mean_diff': mean_diff,
            'median_diff': median_diff,
            'std_diff': std_diff,
            'mad_diff': mad_diff,
            'n_sources': len(splus_mags),
            'aperture': aperture
        }
        
        # Graficar
        ax = axes[i]
        sc = ax.scatter(taylor_mags, splus_mags, alpha=0.6, s=15, c=differences, 
                       cmap='coolwarm', vmin=np.percentile(differences, 2), vmax=np.percentile(differences, 98))
        
        # Línea de 1:1
        x_min, x_max = min(taylor_mags), max(taylor_mags)
        x_range = np.linspace(x_min, x_max, 200)
        ax.plot(x_range, x_range, 'k--', alpha=0.7, label='1:1')
        
        # Línea ajustada
        ax.plot(x_range, slope*x_range + intercept, 'r-', 
                label=f'y = {slope:.3f}x + {intercept:.3f}')
        
        ax.set_xlabel(f'Taylor {taylor_filter}')
        ax.set_ylabel(f'SPLUS {splus_filter} (aper={aperture})')
        ax.set_title(f'{title}\nr = {correlation:.3f}, Δ_med = {median_diff:.3f} ± MAD {mad_diff:.3f}')
        ax.legend(loc='best', fontsize='small')
        ax.grid(True, alpha=0.3)
        
        # Añadir barra de color para las diferencias (por cada subplot)
        cbar = plt.colorbar(sc, ax=ax)
        cbar.set_label('Diferencia (SPLUS - Taylor)')
    
    # Ocultar ejes vacíos (si quedaron)
    for j in range(i+1, len(axes)):
        axes[j].set_visible(False)
    
    plt.tight_layout()
    out_png = '../anac_data/splus_taylor_coherence_analysis_aper{}.png'.format(aperture)
    plt.savefig(out_png, dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Figura guardada en: {out_png}")
    
    return results

def generate_statistical_summary(results, out_csv='../anac_data/splus_taylor_coherence_summary.csv'):
    """
    Generar un resumen estadístico de la comparación y guardarlo en CSV.
    """
    print("="*60)
    print("ANÁLISIS DE COHERENCIA: FOTOMETRÍA SPLUS vs TAYLOR")
    print("="*60)
    
    summary_data = []
    for splus_filter, stats in results.items():
        summary_data.append({
            'Filtro SPLUS': splus_filter,
            'Filtro Taylor': stats['taylor_filter'],
            'Aperture': stats.get('aperture', ''),
            'N': stats['n_sources'],
            'Correlación': f"{stats['correlation']:.3f}",
            'Pendiente': f"{stats['slope']:.3f}",
            'Intercepto': f"{stats['intercept']:.3f}",
            'Δ media': f"{stats['mean_diff']:.3f}",
            'Δ mediana': f"{stats['median_diff']:.3f}",
            'σ': f"{stats['std_diff']:.3f}",
            'MAD': f"{stats['mad_diff']:.3f}"
        })
    
    summary_df = pd.DataFrame(summary_data)
    print(summary_df.to_string(index=False))
    
    # Guardar resumen en CSV
    summary_df.to_csv(out_csv, index=False)
    print(f"\nResumen guardado en '{out_csv}'")

# --------------------
# EJECUCIÓN
# --------------------
df = pd.read_csv('../anac_data/Results/all_fields_gc_photometry_corrected_errors_v17.csv')

# Analizar coherencia usando APERTURE = 3
ap = 3
results = analyze_photometry_coherence(df, aperture=ap)

# Generar resumen estadístico
generate_statistical_summary(results, out_csv=f'../anac_data/splus_taylor_coherence_summary_aper{ap}.csv')

# Análisis adicional: Distribución de diferencias por filtro (usando misma apertura)
plt.figure(figsize=(12, 8))
for splus_filter, stats in results.items():
    splus_col = f'{splus_filter}_{ap}'
    taylor_col = stats['taylor_filter']
    valid_mask = (
        df[splus_col].notna() &
        df[taylor_col].notna() &
        np.isfinite(df[splus_col]) &
        np.isfinite(df[taylor_col]) &
        (df[splus_col] < 90) &
        (df[taylor_col] < 90) &
        (df[splus_col] > 10) &
        (df[taylor_col] > 10)
    )
    differences = df.loc[valid_mask, splus_col] - df.loc[valid_mask, taylor_col]
    if len(differences) == 0:
        continue
    plt.hist(differences, bins=30, alpha=0.5, label=f'{splus_filter} - {taylor_col} (N={len(differences)}) mean={np.mean(differences):.3f}')
plt.xlabel('Diferencia (SPLUS - Taylor)')
plt.ylabel('Frecuencia')
plt.title(f'Distribución de diferencias entre fotometría SPLUS y Taylor (aper={ap})')
plt.legend()
plt.grid(True, alpha=0.3)
out_hist = f'../anac_data/splus_taylor_differences_distribution_aper{ap}.png'
plt.savefig(out_hist, dpi=300, bbox_inches='tight')
plt.close()
print(f"Histograma guardado en: {out_hist}")


Figura guardada en: ../anac_data/splus_taylor_coherence_analysis_aper3.png
ANÁLISIS DE COHERENCIA: FOTOMETRÍA SPLUS vs TAYLOR
Filtro SPLUS Filtro Taylor  Aperture   N Correlación Pendiente Intercepto Δ media Δ mediana     σ   MAD
    MAG_F378          umag         3 147       0.654     0.836      2.741  -0.950    -1.022 0.910 0.466
    MAG_F395          umag         3 138       0.572     0.616      7.300  -1.346    -1.360 0.904 0.612
    MAG_F410          gmag         3 158       0.744     0.946      1.262   0.137     0.062 0.719 0.372
    MAG_F430          gmag         3 159       0.812     0.992      0.155  -0.007    -0.035 0.607 0.290
    MAG_F515          gmag         3 168       0.901     1.004     -0.711  -0.622    -0.664 0.409 0.174
    MAG_F660          rmag         3 168       0.978     0.991     -0.243  -0.430    -0.441 0.165 0.084
    MAG_F861          zmag         3 169       0.917     1.022     -0.724  -0.301    -0.353 0.329 0.153

Resumen guardado en '../anac_data/splus_t

## Exploring the outlier

In [3]:
df.head()

Unnamed: 0,recno,T17ID,oldID,RAJ2000,DEJ2000,Prob,Rgc,PA,umag,gmag,...,MAG_F861_3,MAGERR_F861_3,SNR_F861_3,AP_CORR_F861_3,FIELD,PROCESSING_DATE,PHOTOMETRY_METHOD,ERROR_METHOD,COHERENCE_MEDIAN_DIFF,COHERENCE_MAD
0,2507,T17-2507,,202.456042,-45.608483,1.0,168.59,148.94,23.0,21.01,...,99.0,99.0,0.0,0.326835,CenA01,2025-10-10 12:06:33,S-PLUS_v17_CORRECTED_ERRORS,WEIGHT_MAP_PROPAGATION_CORRECTED,-0.604651,0.429599
1,2502,T17-2502,,202.45,-45.563867,1.0,165.98,148.66,23.14,21.1,...,99.0,99.0,0.0,0.326835,CenA01,2025-10-10 12:06:33,S-PLUS_v17_CORRECTED_ERRORS,WEIGHT_MAP_PROPAGATION_CORRECTED,-0.604651,0.429599
2,2421,T17-2421,,202.283083,-45.413186,1.0,153.84,151.35,23.16,21.37,...,20.034883,0.402306,2.69878,0.326835,CenA01,2025-10-10 12:06:33,S-PLUS_v17_CORRECTED_ERRORS,WEIGHT_MAP_PROPAGATION_CORRECTED,-0.604651,0.429599
3,2433,T17-2433,,202.301625,-45.407514,1.0,153.93,150.81,23.21,21.43,...,21.586106,0.542868,2.0,0.326835,CenA01,2025-10-10 12:06:33,S-PLUS_v17_CORRECTED_ERRORS,WEIGHT_MAP_PROPAGATION_CORRECTED,-0.604651,0.429599
4,2515,T17-2515,,202.489167,-45.511169,1.0,164.03,147.23,23.04,21.26,...,20.009224,0.33052,3.284935,0.326835,CenA01,2025-10-10 12:06:33,S-PLUS_v17_CORRECTED_ERRORS,WEIGHT_MAP_PROPAGATION_CORRECTED,-0.604651,0.429599


In [3]:
df.columns.tolist()

['recno',
 'T17ID',
 'oldID',
 'RAJ2000',
 'DEJ2000',
 'Prob',
 'Rgc',
 'PA',
 'umag',
 'gmag',
 'rmag',
 'imag',
 'zmag',
 'e_umag',
 's_umag',
 'e_gmag',
 's_gmag',
 'e_rmag',
 's_rmag',
 'e_imag',
 's_imag',
 'e_zmag',
 's_zmag',
 'FLUX_F378_2',
 'FLUXERR_F378_2',
 'MAG_F378_2',
 'MAGERR_F378_2',
 'SNR_F378_2',
 'AP_CORR_F378_2',
 'FLUX_F378_3',
 'FLUXERR_F378_3',
 'MAG_F378_3',
 'MAGERR_F378_3',
 'SNR_F378_3',
 'AP_CORR_F378_3',
 'FLUX_F395_2',
 'FLUXERR_F395_2',
 'MAG_F395_2',
 'MAGERR_F395_2',
 'SNR_F395_2',
 'AP_CORR_F395_2',
 'FLUX_F395_3',
 'FLUXERR_F395_3',
 'MAG_F395_3',
 'MAGERR_F395_3',
 'SNR_F395_3',
 'AP_CORR_F395_3',
 'FLUX_F410_2',
 'FLUXERR_F410_2',
 'MAG_F410_2',
 'MAGERR_F410_2',
 'SNR_F410_2',
 'AP_CORR_F410_2',
 'FLUX_F410_3',
 'FLUXERR_F410_3',
 'MAG_F410_3',
 'MAGERR_F410_3',
 'SNR_F410_3',
 'AP_CORR_F410_3',
 'FLUX_F430_2',
 'FLUXERR_F430_2',
 'MAG_F430_2',
 'MAGERR_F430_2',
 'SNR_F430_2',
 'AP_CORR_F430_2',
 'FLUX_F430_3',
 'FLUXERR_F430_3',
 'MAG_F430_3',
 'M

In [4]:
def analyze_photometry_coherence_v2(df, aperture=3, mag_min=10, mag_max=90):
    """
    Analizar la coherencia entre las magnitudes SPLUS y las del catálogo de Taylor.
    Por defecto usa APERTURE = 3.
    """
    # Definir las correspondencias entre filtros (Taylor filter names deben existir en df)
    filter_correspondences = [
        ('MAG_F378', 'umag', 'F378 vs u-band'),
        ('MAG_F395', 'umag', 'F395 vs u-band (aproximado)'),
        ('MAG_F410', 'gmag', 'F410 vs g-band (aproximado)'),
        ('MAG_F430', 'gmag', 'F430 vs g-band (aproximado)'),
        ('MAG_F515', 'gmag', 'F515 vs g-band'),
        ('MAG_F660', 'rmag', 'F660 vs r-band'),
        ('MAG_F861', 'zmag', 'F861 vs i-band')
    ]
    
    results = {}
    n_plots = len(filter_correspondences)
    n_cols = 3
    n_rows = (n_plots + n_cols - 1) // n_cols
    
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5*n_rows))
    axes = axes.ravel()
    
    i = -1  # por si acaso no se procesa ningún filtro
    for i, (splus_filter, taylor_filter, title) in enumerate(filter_correspondences):
        splus_col = f'{splus_filter}_{aperture}'
        
        # Verificar que las columnas existen
        if splus_col not in df.columns or taylor_filter not in df.columns:
            print(f"Advertencia: {splus_col} o {taylor_filter} no encontrados. Saltando.")
            continue
            
        # Filtrar datos válidos (excluir valores > mag_max, < mag_min y NaN)
        valid_mask = (
            df[splus_col].notna() &
            df[taylor_filter].notna() &
            np.isfinite(df[splus_col]) &
            np.isfinite(df[taylor_filter]) &
            (df[splus_col] < mag_max) &
            (df[taylor_filter] < mag_max) &
            (df[splus_col] > mag_min) &
            (df[taylor_filter] > mag_min)
        )
        
        splus_mags = df.loc[valid_mask, splus_col]
        taylor_mags = df.loc[valid_mask, taylor_filter]
        
        if len(splus_mags) < 10:
            print(f"No hay suficientes datos válidos para {splus_col} vs {taylor_filter} (N={len(splus_mags)}). Saltando.")
            continue
        
        # Calcular diferencias
        differences = splus_mags - taylor_mags
        
        # Estadísticas
        mean_diff = np.mean(differences)
        median_diff = np.median(differences)
        std_diff = np.std(differences)
        mad_diff = np.median(np.abs(differences - median_diff))  # Desviación absoluta mediana
        correlation = stats.pearsonr(taylor_mags, splus_mags)[0]
        
        # Ajustar una línea recta robusta (LSQ)
        try:
            init_model = models.Linear1D(slope=1, intercept=0)
            fitter = fitting.LinearLSQFitter()
            fitted_model = fitter(init_model, taylor_mags, splus_mags)
            slope, intercept = float(fitted_model.slope.value), float(fitted_model.intercept.value)
        except Exception as e:
            print(f"Warning fitting linear model for {splus_col}: {e}")
            slope, intercept = 1.0, 0.0
        
        results[splus_filter] = {
            'taylor_filter': taylor_filter,
            'slope': slope,
            'intercept': intercept,
            'correlation': correlation,
            'mean_diff': mean_diff,
            'median_diff': median_diff,
            'std_diff': std_diff,
            'mad_diff': mad_diff,
            'n_sources': len(splus_mags),
            'aperture': aperture
        }
        
        # Graficar
        ax = axes[i]
        sc = ax.scatter(taylor_mags, splus_mags, alpha=0.6, s=15, c=differences, 
                       cmap='coolwarm', vmin=np.percentile(differences, 2), vmax=np.percentile(differences, 98))
        
        # Línea de 1:1
        x_min, x_max = min(taylor_mags), max(taylor_mags)
        x_range = np.linspace(x_min, x_max, 200)
        ax.plot(x_range, x_range, 'k--', alpha=0.7, label='1:1')
        
        # Línea ajustada
        ax.plot(x_range, slope*x_range + intercept, 'r-', 
                label=f'y = {slope:.3f}x + {intercept:.3f}')
        
        ax.set_xlabel(f'Taylor {taylor_filter}')
        ax.set_ylabel(f'SPLUS {splus_filter} (aper={aperture})')
        ax.set_title(f'{title}\nr = {correlation:.3f}, Δ_med = {median_diff:.3f} ± MAD {mad_diff:.3f}')
        ax.legend(loc='best', fontsize='small')
        ax.grid(True, alpha=0.3)
        
        # Añadir barra de color para las diferencias (por cada subplot)
        cbar = plt.colorbar(sc, ax=ax)
        cbar.set_label('Diferencia (SPLUS - Taylor)')
    
    # Ocultar ejes vacíos (si quedaron)
    for j in range(i+1, len(axes)):
        axes[j].set_visible(False)
    
    plt.tight_layout()
    out_png = '../anac_data/splus_taylor_coherence_analysis_aper{}_clean.png'.format(aperture)
    plt.savefig(out_png, dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Figura guardada en: {out_png}")
    
    return results

In [5]:
df_outliers = df[df["MAG_F660_2"]>=25.5]

In [6]:
df_outliers["FIELD"].unique()

array(['CenA01', 'CenA02', 'CenA03', 'CenA06', 'CenA07', 'CenA09',
       'CenA10', 'CenA11', 'CenA12', 'CenA13', 'CenA16', 'CenA17',
       'CenA18', 'CenA19', 'CenA20', 'CenA21', 'CenA22', 'CenA23'],
      dtype=object)

In [7]:
len(df_outliers)

479

## Removing theses outliers

In [8]:
df_clean = df[(df["MAG_F515_2"]<=22) & (df["MAG_F861_2"] <=22)]

In [9]:
len(df_clean)

2742

In [10]:
from scipy import stats
# Analizar coherencia usando APERTURE = 2
results1 = analyze_photometry_coherence_v2(df_clean, aperture=ap)



Figura guardada en: ../anac_data/splus_taylor_coherence_analysis_aper2_clean.png


In [11]:
# Análisis adicional: Distribución de diferencias por filtro (usando misma apertura)
plt.figure(figsize=(12, 8))
for splus_filter, stats in results1.items():
    splus_col = f'{splus_filter}_{ap}'
    taylor_col = stats['taylor_filter']
    valid_mask = (
        df_clean[splus_col].notna() &
        df_clean[taylor_col].notna() &
        np.isfinite(df[splus_col]) &
        np.isfinite(df[taylor_col]) &
        (df_clean[splus_col] < 90) &
        (df_clean[taylor_col] < 90) &
        (df_clean[splus_col] > 10) &
        (df_clean[taylor_col] > 10)
    )
    differences1 = df_clean.loc[valid_mask, splus_col] - df_clean.loc[valid_mask, taylor_col]
    if len(differences) == 0:
        continue
    plt.hist(differences1, bins=30, alpha=0.5, label=f'{splus_filter} - {taylor_col} (N={len(differences1)}) mean={np.mean(differences1):.3f}')
plt.xlabel('Diferencia (SPLUS - Taylor)')
plt.ylabel('Frecuencia')
plt.title(f'Distribución de diferencias entre fotometría SPLUS y Taylor (aper={ap})')
plt.legend()
plt.grid(True, alpha=0.3)
out_hist = f'../anac_data/splus_taylor_differences_distribution_aper{ap}_clean.png'
plt.savefig(out_hist, dpi=300, bbox_inches='tight')
plt.close()
print(f"Histograma guardado en: {out_hist}")


Histograma guardado en: ../anac_data/splus_taylor_differences_distribution_aper2_clean.png


### Validating 2 arcsec aper