# Open Clusters Analytics - 03_Membership analysis of M37 - DBSCAN

In [None]:
# ================================================
# Membership Analysis for M37 Open Cluster
# ========================================================
# This notebook performs the membership Analysis of M37
# using data from Gaia DR3 and applying the DBSCAN algorithm

# Import necessary libraries

import numpy as np
import matplotlib.pyplot as plt
from astroquery.gaia import Gaia
from astropy.coordinates import SkyCoord
import astropy.units as u
from sklearn.cluster import DBSCAN
from sklearn.mixture import GaussianMixture
import hdbscan
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

# Configuration of visuals
from matplotlib.patches import Ellipse
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 8)


# =============================================================================
# 1: DOWNLOAD DATA FROM GAIA DR3
# =============================================================================

print("="*70)
print("1: DOWNLOAD DATA FROM GAIA DR3")
print("="*70)

# Parameters of the cluster
cluster_name = "NGC 2099"  
cluster_ra = 88.074  # Right ascension (degrees)
cluster_dec = 32.545  # Declination (degrees)
search_radius = 1.5 # Search radius (degrees)

print(f"\nAnalyzing cluster: {cluster_name}")
print(f"Coordinates: RA={cluster_ra}°, Dec={cluster_dec}°")
print(f"Search Radius: {search_radius}°")

# Build ADQL (Astronomical Data Query Language) query
query = f"""
SELECT 
    ra, dec, pmra, pmdec, parallax,
    phot_g_mean_mag, phot_bp_mean_mag, phot_rp_mean_mag,
    ruwe, astrometric_excess_noise
FROM gaiadr3.gaia_source
WHERE 1=CONTAINS(
    POINT('ICRS', ra, dec),
    CIRCLE('ICRS', {cluster_ra}, {cluster_dec}, {search_radius})
)
AND parallax IS NOT NULL
AND parallax/parallax_error > 5
AND pmra IS NOT NULL
AND pmdec IS NOT NULL
AND ruwe < 1.4
AND phot_g_mean_mag < 20
ORDER BY phot_g_mean_mag
"""
gaia_username="user"
gaia_password="pass"
authenticated = False

if gaia_username and gaia_password:
    try:
        print(f"\n  Attempting authentication on Gaia...")
        Gaia.login(user=gaia_username, password=gaia_password)
        authenticated = True
        print("   Successful login to Gaia (unlimited queries)")
    except Exception as e:
        print(f"  Login failed: {e}")
        print("  Continuing without authentication (limit ~2000 rows)")


print("\Downloading data from Gaia DR3...")

try:
    if authenticated:
        # Asynchronous query
        print(f"\n Running asynchronous query (may take 1-2 minutes)...")
        job = Gaia.launch_job_async(query)
        data = job.get_results()
        print(f"  Download complete (async): {len(data)} stars")
    else:
        # Asynchronous query (anonymous, limited to 2000 rows)
        print(f"\n Running synchronous query...")
        job = Gaia.launch_job(query)
        data = job.get_results()
        print(f"  Download complete (sync): {len(data)} stars")
    
except Exception as e:
    print(f"\n Error executing query in Gaia: {e}")
    print("\nPossible causes:")
    print("  - Query too large (>2000 rows without authentication)")
    print("  - Server timeout")
    print("  - Incorrect ADQL syntax")
    raise

print(f"\nData successfully downloaded")
print(f"Number of stars: {len(data)}")

# Convert to numpy arrays for easier analysis
ra = np.array(data['ra'])
dec = np.array(data['dec'])
pmra = np.array(data['pmra'])
pmdec = np.array(data['pmdec'])
parallax = np.array(data['parallax'])
gmag = np.array(data['phot_g_mean_mag'])

# Initial visualization of data
fig, axes = plt.subplots(2, 2, figsize=(14, 12))

axes[0, 0].scatter(ra, dec, s=1, alpha=0.5, c='gray')
axes[0, 0].set_xlabel('RA (degrees)')
axes[0, 0].set_ylabel('Dec (degrees)')
axes[0, 0].set_title('Spatial distribution of stars')

axes[0, 1].scatter(pmra, pmdec, s=1, alpha=0.5, c='gray')
axes[0, 1].set_xlabel('μ_α* (mas/yr)')
axes[0, 1].set_ylabel('μ_δ (mas/yr)')
axes[0, 1].set_title('Proper motion diagram')

axes[1, 0].hist(parallax, bins=50, alpha=0.7, color='skyblue', edgecolor='black')
axes[1, 0].set_xlabel('Parallax (mas)')
axes[1, 0].set_ylabel('Number of stars')
axes[1, 0].set_title('Ditribution of Parallax')

axes[1, 1].hist(gmag, bins=50, alpha=0.7, color='salmon', edgecolor='black')
axes[1, 1].set_xlabel('G Magnitude')
axes[1, 1].set_ylabel('Number of stars')
axes[1, 1].set_title('Distribution of Magnitudes ')

plt.tight_layout()
plt.savefig('initial_data.png', dpi=150, bbox_inches='tight')
plt.show()

print("\nBasics statistics of data:")
print(f"  Mean Parallax: {np.mean(parallax):.2f} ± {np.std(parallax):.2f} mas")
print(f"  Mean μ_α: {np.mean(pmra):.2f} ± {np.std(pmra):.2f} mas/yr")
print(f"  Mean μ_δ: {np.mean(pmdec):.2f} ± {np.std(pmdec):.2f} mas/yr")



In [None]:

# =============================================================================
# 1.5: PRE-FILTERING
# =============================================================================

print("\n" + "="*70)
print("1.5: VISUAL INSPECTION")
print("="*70)

# Detailed exploratory visualisation
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
fig.suptitle('Visual Inspection Pre-Clustering', fontsize=16, fontweight='bold')

# 1. Proper Motions
axes[0, 0].scatter(pmra, pmdec, s=1, alpha=0.3, c='gray')
axes[0, 0].set_xlabel('μ_α* (mas/yr)')
axes[0, 0].set_ylabel('μ_δ (mas/yr)')
axes[0, 0].set_title('Proper motions')
axes[0, 0].grid(True, alpha=0.3)

# 2. Histogram of proper motions
h = axes[0, 1].hist2d(pmra, pmdec, bins=50, cmap='hot')
axes[0, 1].set_xlabel('μ_α* (mas/yr)')
axes[0, 1].set_ylabel('μ_δ (mas/yr)')
axes[0, 1].set_title('Density on Proper Motions')
plt.colorbar(h[3], ax=axes[0, 1])

# 3. Parallax vs pmra
axes[0, 2].scatter(pmra, parallax, s=1, alpha=0.3, c='gray')
axes[0, 2].set_xlabel('μ_α* (mas/yr)')
axes[0, 2].set_ylabel('Parallax (mas)')
axes[0, 2].set_title('Correlation μ_α* vs Parallax')
axes[0, 2].grid(True, alpha=0.3)

# 4. Parallax vs pmdec
axes[1, 0].scatter(pmdec, parallax, s=1, alpha=0.3, c='gray')
axes[1, 0].set_xlabel('μ_δ (mas/yr)')
axes[1, 0].set_ylabel('Paralaje (mas)')
axes[1, 0].set_title('Correlation μ_δ vs Parallax')
axes[1, 0].grid(True, alpha=0.3)

# 5. Histograms of each parameter
axes[1, 1].hist(pmra, bins=100, alpha=0.5, label='μ_α*', color='blue')
axes[1, 1].hist(pmdec, bins=100, alpha=0.5, label='μ_δ', color='red')
axes[1, 1].set_xlabel('Proper motion (mas/yr)')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].set_title('Distributions of Proper Motions')
axes[1, 1].legend()
axes[1, 1].set_yscale('log')

# 6. Distribution of parallax
axes[1, 2].hist(parallax, bins=100, alpha=0.7, color='green', edgecolor='black')
axes[1, 2].set_xlabel('Parallax (mas)')
axes[1, 2].set_ylabel('Frequency')
axes[1, 2].set_title('Distribution of Parallaxes')
axes[1, 2].axvline(0.67, color='red', linestyle='--', linewidth=2, 
                   label='Expected Parallax for NGC 2099 (~0.666 mas)')
axes[1, 2].legend()
axes[1, 2].set_yscale('log')

plt.tight_layout()
plt.savefig('visual_inspection_pre_clustering.png', dpi=150, bbox_inches='tight')
plt.show()

# Statistics to identify the probable range of the cluster
print("\nStatistics for NGC 2099 (literature):")
print("  Expected Parallax: ~0.666 mas (distancia ~1500 pc)")
print("  Expected μ_α*: ~1.9 mas/yr")
print("  Expected μ_δ: ~-5.6 mas/yr")

# Filter based in expected ranges of NGC 2099
parallax_min, parallax_max = 0.50, 0.85    # Centered in 0.666 ± 0.15
pmra_min, pmra_max = 0.7, 3.0              # Centered in 1.87 ± 1.1
pmdec_min, pmdec_max = -7.0, -4.0          # Centered in -5.62 ± 1.4

mask_prefilter = (
    (parallax >= parallax_min) & (parallax <= parallax_max) &
    (pmra >= pmra_min) & (pmra <= pmra_max) &
    (pmdec >= pmdec_min) & (pmdec <= pmdec_max)
)

n_before = len(data)
n_after = np.sum(mask_prefilter)
print(f"\nResult of pre-filtering:")
print(f"  Stars before: {n_before}")
print(f"  Stars after: {n_after}")
print(f"  Reduction: {100*(1-n_after/n_before):.1f}%")

# Apply filter
ra_filt = ra[mask_prefilter]
dec_filt = dec[mask_prefilter]
pmra_filt = pmra[mask_prefilter]
pmdec_filt = pmdec[mask_prefilter]
parallax_filt = parallax[mask_prefilter]
gmag_filt = gmag[mask_prefilter]

# Visualize the effect of the filter
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
fig.suptitle('Efect of Pre-filtering', fontsize=16, fontweight='bold')

axes[0].scatter(pmra, pmdec, s=1, alpha=0.2, c='gray', label='All')
axes[0].scatter(pmra_filt, pmdec_filt, s=3, alpha=0.6, c='blue', label='Filtered')
axes[0].set_xlabel('μ_α* (mas/yr)')
axes[0].set_ylabel('μ_δ (mas/yr)')
axes[0].set_title('Proper Motions')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

axes[1].scatter(ra, dec, s=1, alpha=0.2, c='gray', label='All')
axes[1].scatter(ra_filt, dec_filt, s=3, alpha=0.6, c='blue', label='Filtered')
axes[1].set_xlabel('RA (grados)')
axes[1].set_ylabel('Dec (grados)')
axes[1].set_title('Spatial Distribution')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('efect_of_prefiltering.png', dpi=150, bbox_inches='tight')
plt.show()

print("\nBasic statistics of filtered data:")
print(f"  Mean Parallax: {np.mean(parallax_filt):.2f} ± {np.std(parallax_filt):.2f} mas")
print(f"  Mean μ_α*: {np.mean(pmra_filt):.2f} ± {np.std(pmra_filt):.2f} mas/yr")
print(f"  Mean μ_δ: {np.mean(pmdec_filt):.2f} ± {np.std(pmdec_filt):.2f} mas/yr")



In [None]:
# =============================================================================
# 2: MEMBERSHIP ANALYSIS WITH DBSCAN (using filtered data)
# =============================================================================

print("\n" + "="*70)
print("2: MEMBERSHIP ANALYSIS WITH DBSCAN ")
print("="*70)

# Prepare filtered data for clustering
# I will use proper motions and parallax (the most discriminating space).
X_dbscan = np.column_stack([pmra_filt, pmdec_filt, parallax_filt])

# Normalize data: pmra, pmdec and parallax aare in different scales
scaler_dbscan = StandardScaler()
X_dbscan_scaled = scaler_dbscan.fit_transform(X_dbscan)

eps=0.3
min_samples=20

print("\nApplying DBSCAN with filtered data...")
print(f"Parámetros ajustados: eps={eps}, min_samples={min_samples}")

# Apply DBSCAN 
dbscan = DBSCAN(eps=eps, min_samples=min_samples)
labels_dbscan = dbscan.fit_predict(X_dbscan_scaled)

# Analysis of results
n_clusters_dbscan = len(set(labels_dbscan)) - (1 if -1 in labels_dbscan else 0)
n_noise_dbscan = list(labels_dbscan).count(-1)

print(f"\nResults from DBSCAN:")
print(f"  Number of clusters identified: {n_clusters_dbscan}")
print(f"  Stars classified as noise (field): {n_noise_dbscan}")
print(f"  Stars in clusters: {len(pmra_filt) - n_noise_dbscan}")

# I assume that the main cluster is the largest group (excluding noise).
if n_clusters_dbscan > 0:
    cluster_sizes = [np.sum(labels_dbscan == i) for i in range(n_clusters_dbscan)]
    main_cluster_id = np.argmax(cluster_sizes)
    mask_members_dbscan = labels_dbscan == main_cluster_id
    print(f"  Main clusters has {cluster_sizes[main_cluster_id]} stars")
else:
    mask_members_dbscan = np.zeros(len(pmra_filt), dtype=bool)

# Visualization
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
fig.suptitle('Results of the analysis with DBSCAN', fontsize=16, fontweight='bold')

# Diagram of proper motions
colors_dbscan = ['grey' if x == -1 else 'red' if x == main_cluster_id else 'green' 
                 for x in labels_dbscan]
axes[0, 0].scatter(pmra_filt, pmdec_filt, c=colors_dbscan, s=2, alpha=0.6)
axes[0, 0].set_xlabel('μ_α* (mas/yr)')
axes[0, 0].set_ylabel('μ_δ (mas/yr)')
axes[0, 0].set_title('Proper motions (red=cluster, grey=field)')

# Spatial distribution
axes[0, 1].scatter(ra_filt, dec_filt, c=colors_dbscan, s=2, alpha=0.6)
axes[0, 1].set_xlabel('RA (degrees)')
axes[0, 1].set_ylabel('Dec (degrees)')
axes[0, 1].set_title('Spatial distribution')

# Diagram parallax-magnitude
axes[1, 0].scatter(parallax_filt, gmag_filt, c=colors_dbscan, s=2, alpha=0.6)
axes[1, 0].set_xlabel('Parallax (mas)')
axes[1, 0].set_ylabel('G Magnitude')
axes[1, 0].invert_yaxis()
axes[1, 0].set_title('Diagram parallax-magnitude')

# Histogram of parallaxes
axes[1, 1].hist(parallax_filt[labels_dbscan == -1], bins=30, alpha=0.5, 
                label='Field', color='grey', edgecolor='black')
if np.sum(mask_members_dbscan) > 0:
    axes[1, 1].hist(parallax_filt[mask_members_dbscan], bins=30, alpha=0.5, 
                    label='Cluster', color='red', edgecolor='black')
axes[1, 1].set_xlabel('Parallax (mas)')
axes[1, 1].set_ylabel('Number of stars')
axes[1, 1].set_title('Comparison of parallax')
axes[1, 1].legend()

plt.tight_layout()
plt.savefig('dbscan_results.png', dpi=150, bbox_inches='tight')
plt.show()



In [None]:

# ============================================================================
# 3: Photometry of original data
# ============================================================================

# Colours from Gaia
bp_mag = np.array(data['phot_bp_mean_mag'])
rp_mag = np.array(data['phot_rp_mean_mag'])
g_mag = np.array(data['phot_g_mean_mag'])

# Colour BP-RP
color = bp_mag - rp_mag

# Apply pre-filter 
bp_mag_filt = bp_mag[mask_prefilter]
rp_mag_filt = rp_mag[mask_prefilter]
color_filt = color[mask_prefilter]


mask_valid_phot = np.isfinite(color_filt) & np.isfinite(gmag_filt)

# Create masks combined for CMD
mask_members_dbscan_phot = mask_members_dbscan & mask_valid_phot
mask_field_phot = ~mask_members_dbscan & mask_valid_phot

fig, ax = plt.subplots(1, 1, figsize=(8, 10))

# Plot field (blue, fondo)
ax.scatter(color_filt[mask_field_phot], gmag_filt[mask_field_phot], 
           s=1, alpha=0.15, c='blue', label='Field', zorder=1)

# Plot DBSCAN members (red)
ax.scatter(color_filt[mask_members_dbscan_phot], gmag_filt[mask_members_dbscan_phot], 
           s=8, alpha=0.7, c='red', edgecolors='darkgreen', 
           linewidth=0.3, label=f'DBSCAN ({np.sum(mask_members_dbscan_phot)} stars)',
           zorder=2)

# Config axes
ax.set_xlabel('BP - RP (mag)', fontsize=14, fontweight='bold')
ax.set_ylabel('G (mag)', fontsize=14, fontweight='bold')
ax.set_title('Color-Magnitude Diagram - M37)\nMethod: DBSCAN', 
             fontsize=16, fontweight='bold', pad=20)
ax.invert_yaxis()
ax.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)
ax.legend(loc='upper left', fontsize=11, framealpha=0.9)

plt.tight_layout()
plt.savefig('cmd_dbscan_ngc2099.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# ============================================================================
# 4. Detailed CMD
# ============================================================================

# Identify stars with valid phometry in Gaia
mask_valid_phot = np.isfinite(color_filt) & np.isfinite(gmag_filt)

n_invalid = np.sum(~mask_valid_phot)

# Create masks combined for CMD
mask_members_dbscan_phot = mask_members_dbscan & mask_valid_phot
mask_field_phot = ~mask_members_dbscan & mask_valid_phot

# Principal CMD Diagram
fig, ax = plt.subplots(1, 1, figsize=(12, 14))

# Plot field (blue, fondo)
ax.scatter(color_filt[mask_field_phot], gmag_filt[mask_field_phot], 
           s=1, alpha=0.15, c='blue', label='Field', zorder=1)

# Plot DBSCAN members (red)
ax.scatter(color_filt[mask_members_dbscan_phot], gmag_filt[mask_members_dbscan_phot], 
           s=8, alpha=0.7, c='red', edgecolors='darkgreen', 
           linewidth=0.3, label=f'DBSCAN ({np.sum(mask_members_dbscan_phot)} stars)',
           zorder=2)

# Config axes
ax.set_xlabel('BP - RP (mag)', fontsize=14, fontweight='bold')
ax.set_ylabel('G (mag)', fontsize=14, fontweight='bold')
ax.set_title('Colour-Magnitude - (M37)\nMethod: DBSCAN', 
             fontsize=16, fontweight='bold', pad=20)
ax.invert_yaxis()
ax.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)
ax.legend(loc='upper right', fontsize=11, framealpha=0.9)


# Identify regions of CMD

# Define regions 
# Main Sequence
mask_ms = mask_members_dbscan_phot & (color_filt > 0.5) & (gmag_filt > 11.5)

# Turn-off point
mask_turnoff = mask_members_dbscan_phot & \
               (color_filt > 0.3) & (color_filt < 0.7) & \
               (gmag_filt > 9.5) & (gmag_filt < 12.0)

# Red Giants
mask_giants = mask_members_dbscan_phot & (color_filt > 1.0) & (gmag_filt < 11.5)

# Red Clump
mask_redclump = mask_members_dbscan_phot & \
                (color_filt > 1.2) & (color_filt < 1.6) & \
                (gmag_filt > 9.5) & (gmag_filt < 11.5)

# Identify and plot turn-off point
if np.sum(mask_turnoff) > 5:
    turnoff_colors = color_filt[mask_turnoff]
    turnoff_mags = gmag_filt[mask_turnoff]
    
    # Finding the brightest spot among the bluest
    blue_threshold = np.percentile(turnoff_colors, 30)
    mask_blue = turnoff_colors < blue_threshold
    
    if np.sum(mask_blue) > 0:
        idx_turnoff = np.argmin(turnoff_mags[mask_blue])
        turnoff_color = turnoff_colors[mask_blue][idx_turnoff]
        turnoff_mag = turnoff_mags[mask_blue][idx_turnoff]
    else:
        idx_turnoff = np.argmin(turnoff_mags)
        turnoff_color = turnoff_colors[idx_turnoff]
        turnoff_mag = turnoff_mags[idx_turnoff]
    
    # Plot turn-off 
    ax.scatter([turnoff_color], [turnoff_mag], s=300, c='red', marker='*', 
               edgecolors='darkred', linewidth=2.5, zorder=5)
    
    ax.annotate('TURN-OFF POINT\n(stars leaving the main sequence)\nM ≈ 2.0-2.5 M☉', 
                xy=(turnoff_color, turnoff_mag), 
                xytext=(turnoff_color - 0.4, turnoff_mag - 2.0),
                fontsize=11, fontweight='bold', color='black',
                bbox=dict(boxstyle='round,pad=0.5', facecolor='gray', alpha=0.8),
                arrowprops=dict(arrowstyle='->', color='black', lw=2.5))
    
    print(f"\n{'─'*70}")
    print(f"Turn-off point identified:")
    print(f"{'─'*70}")
    print(f"  Position: BP-RP = {turnoff_color:.3f} mag, G = {turnoff_mag:.2f} mag")
    print(f"  Estimated mass: ~2.0-2.5 M☉")
    print(f"  Life in Main Sequence: ~500-800 Myr")
    print(f"  Cluster age: ~500-700 Myr")

# Main Sequence annotations

if np.sum(mask_ms) > 50:
    ms_color_median = np.median(color_filt[mask_ms])
    ms_mag_median = np.median(gmag_filt[mask_ms])
    
    ax.annotate('MAIN SEQUENCE\n(burning H in the core)\nM < 2.0 M☉', 
                xy=(ms_color_median, ms_mag_median),
                xytext=(ms_color_median + 0.6, ms_mag_median + 1.5),
                fontsize=11, fontweight='bold', color='black',
                bbox=dict(boxstyle='round,pad=0.5', facecolor='gray', alpha=0.8),
                arrowprops=dict(arrowstyle='->', color='black', lw=2.5))

# Red Giants annotations
if np.sum(mask_giants) > 5:
    giants_color_median = np.median(color_filt[mask_giants])
    giants_mag_median = np.median(gmag_filt[mask_giants])
    
    ax.annotate('BRANCH OF RED GIANTS\n(Post-SP, H in shell)\ninitial mass > 2.5 M☉', 
                xy=(giants_color_median, giants_mag_median),
                xytext=(giants_color_median + 0.4, giants_mag_median - 1.5),
                fontsize=11, fontweight='bold', color='black',
                bbox=dict(boxstyle='round,pad=0.5', facecolor='gray', alpha=0.8),
                arrowprops=dict(arrowstyle='->', color='black', lw=2.5))
    
    print(f"\n{'─'*70}")
    print(f"Red Giants:")
    print(f"{'─'*70}")
    print(f"  Mean BP-RP = {giants_color_median:.2f} mag")
    print(f"  Mean Magnitude G = {giants_mag_median:.1f} mag")
    print(f"  Number: {np.sum(mask_giants)} stars")
    print(f"  Original mass: 2.5-3.5 M☉ (already evolutionated)")

# Red Clump annotation
if np.sum(mask_redclump) > 10:
    rc_color_median = np.median(color_filt[mask_redclump])
    rc_mag_median = np.median(gmag_filt[mask_redclump])
    
    ellipse = Ellipse((rc_color_median, rc_mag_median), 
                      width=0.35, height=0.9, 
                      angle=0, alpha=0.25, facecolor='lightblue', 
                      edgecolor='blue', linewidth=2.5, zorder=3)
    ax.add_patch(ellipse)
    
    ax.annotate('Red Clump\n(Burning He in the core)', 
                xy=(rc_color_median, rc_mag_median),
                xytext=(rc_color_median - 0.7, rc_mag_median + 0.8),
                fontsize=10, fontweight='bold', color='black',
                bbox=dict(boxstyle='round,pad=0.4', facecolor='gray', alpha=0.8),
                arrowprops=dict(arrowstyle='->', color='black', lw=1.8))
    
    print(f"\n{'─'*70}")
    print(f"Red Clump detected:")
    print(f"{'─'*70}")
    print(f"  Position: BP-RP = {rc_color_median:.2f}, G = {rc_mag_median:.1f}")
    print(f"  Number: {np.sum(mask_redclump)} estrellas")
    print(f"  Phase: Burning He in the core")

# Mass annotation (aprox)

mass_labels = [
    {'mass': '3.0 M☉', 'colour': 0.35, 'mag': 9.5},
    {'mass': '2.5 M☉', 'colour': 0.42, 'mag': 10.0},
    {'mass': '2.0 M☉', 'colour': 0.50, 'mag': 10.8},
    {'mass': '1.5 M☉', 'colour': 0.65, 'mag': 11.8},
    {'mass': '1.0 M☉', 'colour': 0.90, 'mag': 13.0},
    {'mass': '0.8 M☉', 'colour': 1.15, 'mag': 14.2},
]

for ml in mass_labels:
    nearby = mask_members_dbscan_phot & \
             (np.abs(color_filt - ml['color']) < 0.2) & \
             (np.abs(gmag_filt - ml['mag']) < 1.0)
    
    if np.sum(nearby) > 3:
        ax.text(ml['colour'] - 0.08, ml['mag'], ml['mass'],
                fontsize=8, style='italic', color='black', alpha=0.8,
                ha='right', va='center',
                bbox=dict(boxstyle='round,pad=0.25', 
                         facecolor='white', alpha=0.7, edgecolor='black', linewidth=0.8))

# Statistics
if np.sum(mask_members_dbscan_phot) > 0:
    color_mean = np.mean(color_filt[mask_members_dbscan_phot])
    color_std = np.std(color_filt[mask_members_dbscan_phot])
    mag_min = np.min(gmag_filt[mask_members_dbscan_phot])
    mag_max = np.max(gmag_filt[mask_members_dbscan_phot])
    
    stats_text = f'═══ DBSCAN ═══\n'
    stats_text += f'Total: {np.sum(mask_members_dbscan_phot)}\n'
    stats_text += f'⟨BP-RP⟩ = {color_mean:.2f} ± {color_std:.2f}\n'
    stats_text += f'G: {mag_min:.1f} - {mag_max:.1f}\n'
    stats_text += f'───────────\n'
    stats_text += f'Giants: {np.sum(mask_giants)}\n'
    stats_text += f'Turn-off: {np.sum(mask_turnoff)}\n'
    stats_text += f'Main Sequence: {np.sum(mask_ms)}'
    
    ax.text(0.98, 0.02, stats_text,
            transform=ax.transAxes,
            fontsize=9.5, family='monospace',
            verticalalignment='bottom',
            horizontalalignment='right',
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, 
                     edgecolor='black', linewidth=1.5))

plt.tight_layout()
plt.savefig('cmd_dbscan_ngc2099_annotated.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:

# ============================================================================
# 5. Detailed Analysis
# ============================================================================

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Detailed CMD Analysis- DBSCAN NGC 2099', 
             fontsize=14, fontweight='bold')

# 1. Colour histogram
axes[0, 0].hist(color_filt[mask_members_dbscan_phot], bins=40, alpha=0.7, 
                color='red', edgecolor='black', linewidth=1.2)
axes[0, 0].set_xlabel('BP - RP (mag)', fontsize=11)
axes[0, 0].set_ylabel('Number of stars', fontsize=11)
axes[0, 0].set_title('Distribution of colours', fontsize=12, fontweight='bold')
median_color = np.median(color_filt[mask_members_dbscan_phot])
axes[0, 0].axvline(median_color, color='red', linestyle='--', linewidth=2, 
                   label=f'Median = {median_color:.2f}')
axes[0, 0].legend(fontsize=10)
axes[0, 0].grid(True, alpha=0.3)

# 2. Luminosity function
axes[0, 1].hist(gmag_filt[mask_members_dbscan_phot], bins=40, alpha=0.7, 
                color='red', edgecolor='black', linewidth=1.2, 
                orientation='horizontal')
axes[0, 1].set_ylabel('G (mag)', fontsize=11)
axes[0, 1].set_xlabel('Number of stars', fontsize=11)
axes[0, 1].set_title('Luminosity function', fontsize=12, fontweight='bold')
axes[0, 1].invert_yaxis()
axes[0, 1].grid(True, alpha=0.3)

# 3. Hess Diagram 
h = axes[1, 0].hist2d(color_filt[mask_members_dbscan_phot], 
                      gmag_filt[mask_members_dbscan_phot],
                      bins=50, cmap='YlOrRd', 
                      norm=plt.matplotlib.colors.LogNorm())
axes[1, 0].set_xlabel('BP - RP (mag)', fontsize=11)
axes[1, 0].set_ylabel('G (mag)', fontsize=11)
axes[1, 0].set_title('Hess Diagram', fontsize=12, fontweight='bold')
axes[1, 0].invert_yaxis()
plt.colorbar(h[3], ax=axes[1, 0], label='log(N)')

# 4. Main sequence width
if np.sum(mask_ms) > 50:
    ms_colors = color_filt[mask_ms]
    ms_mags = gmag_filt[mask_ms]
    
    # Lineal adjustment
    coeffs = np.polyfit(ms_colors, ms_mags, 1)
    ms_fit = np.poly1d(coeffs)
    residuals = ms_mags - ms_fit(ms_colors)
    sigma_res = np.std(residuals)
    
    axes[1, 1].hist(residuals, bins=30, alpha=0.7, 
                    color='red', edgecolor='black', linewidth=1.2)
    axes[1, 1].set_xlabel('Residual (mag)', fontsize=11)
    axes[1, 1].set_ylabel('Number of stars', fontsize=11)
    axes[1, 1].set_title('Main sequence width', fontsize=12, fontweight='bold')
    axes[1, 1].axvline(0, color='red', linestyle='--', linewidth=2)
    
    stats_box = f'σ = {sigma_res:.3f} mag\nFWHM = {2.355*sigma_res:.3f} mag'
    axes[1, 1].text(0.05, 0.95, stats_box,
                    transform=axes[1, 1].transAxes,
                    fontsize=11, verticalalignment='top',
                    bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.8))
    axes[1, 1].grid(True, alpha=0.3)
    
    print(f"\n{'='*70}")
    print(f"Main sequence width:")
    print(f"{'='*70}")
    print(f"  Dispersion (σ): {sigma_res:.4f} mag")
    print(f"  FWHM: {2.355*sigma_res:.4f} mag")
    
    if sigma_res < 0.15:
        print(f‘  EXCELLENT: Very narrow sequence’)
        print(f‘    Uniform age CONFIRMED’)
        print(f‘    Age dispersion < 50-100 Myr’)
    elif sigma_res < 0.25:
        print(f‘  GOOD: Narrow sequence’)
        print(f‘    Age mostly uniform’)
    else:
        print(f"  WARNING: Wide sequence")
        print(f"    Possible significant age dispersion")
else:
    axes[1, 1].text(0.5, 0.5, 'IInsufficient stars\nin SP for analysis',
                    ha='center', va='center', fontsize=12,
                    transform=axes[1, 1].transAxes)

plt.tight_layout()
plt.savefig('cmd_dbscan_analisis_ngc2099.png', dpi=200, bbox_inches='tight')
plt.show()

print(f"\n{'='*70}")
print("COMPLETED CMD Analysis")
print(f"{'='*70}\n")