In [None]:
from IPython.display import clear_output
import numpy as np
import polars as pl
from scipy.spatial import KDTree
import warnings
warnings.filterwarnings('ignore')
clear_output()

In [None]:
def upload_file(filename, prompt, columns_names):

    print(prompt)
    
    df = pl.read_parquet(filename)
    
    if len(df.columns) != len(columns_names):
        raise ValueError(f"Число колонок в файле ({len(df.columns)}) не совпадает с числом имен ({len(columns_names)})")
    
    rename_dict = {old: new for old, new in zip(df.columns, columns_names)}
    df = df.rename(rename_dict)

    if len(columns_names) == 9: 
        schema_overrides = {
            columns_names[0]: pl.Float64,
            columns_names[1]: pl.Float64,
            columns_names[2]: pl.Float64,
            columns_names[3]: pl.Float64,
            columns_names[4]: pl.Float64,
            columns_names[5]: pl.Float64,
            columns_names[6]: pl.Float64,
            columns_names[7]: pl.String,
            columns_names[8]: pl.String
        }
    else:
        raise ValueError(f"Неподдерживаемое число колонок: {len(columns_names)}")
    
    df = df.with_columns([
        pl.col(col).cast(dtype) for col, dtype in schema_overrides.items()
    ])
    
    return df, filename

In [None]:
def calculate_angular_distance(ra1_deg, dec1_deg, ra2_deg, dec2_deg):

    ra1_rad = np.radians(ra1_deg)
    dec1_rad = np.radians(dec1_deg)
    ra2_rad = np.radians(ra2_deg)
    dec2_rad = np.radians(dec2_deg)

    delta_ra = np.abs(ra1_deg - ra2_deg)
    delta_ra = np.minimum(delta_ra, 360 - delta_ra)
    delta_ra_rad = np.radians(delta_ra)

    cos_distance = (
        np.sin(dec1_rad) * np.sin(dec2_rad) +
        np.cos(dec1_rad) * np.cos(dec2_rad) * np.cos(delta_ra_rad)
    )
    cos_distance = np.clip(cos_distance, -1.0, 1.0)
    distance_rad = np.arccos(cos_distance)
    distance_deg = np.degrees(distance_rad)
    
    return distance_deg

In [None]:
def compute_distances(coords_cat, coords_cols, mag_lim=9, theta_max=0.7, filtering_coords=False):

    print("Calculating the distances...")
    
    # Фильтрация звезд по Gmag < mag_lim, если установлен флаг True
    filtered_df = coords_cat.filter(pl.col('Gmag') < mag_lim) if filtering_coords else coords_cat
    
    coords = filtered_df.select(coords_cols).to_numpy()
    names = filtered_df.select(['Name_a', 'Name_b']).to_numpy()
    
    tree = KDTree(coords)
    pairs = tree.query_pairs(r=theta_max*1.5, output_type='ndarray')
    
    result = []
    k = 0
    
    i, j = pairs[:, 0], pairs[:, 1]
    ra1, dec1 = coords[i, 0], coords[i, 1]
    ra2, dec2 = coords[j, 0], coords[j, 1]
    theta = calculate_angular_distance(ra1, dec1, ra2, dec2)
    
    valid = (theta > 0) & (theta <= theta_max)
    i, j, theta = i[valid], j[valid], theta[valid]
    
    for idx in range(len(i)):
        result.append({
            'Dist': theta[idx],
            'Name1_a': names[i[idx], 0],
            'Name1_b': names[i[idx], 1],
            'Name2_a': names[j[idx], 0],
            'Name2_b': names[j[idx], 1]
        })
        k=k+1
    
    dists_cat = pl.DataFrame(result, schema={
        'Dist': pl.Float64,
        'Name1_a': pl.String,
        'Name1_b': pl.String,
        'Name2_a': pl.String,
        'Name2_b': pl.String
    })
    dists_cat = dists_cat.unique(keep="first")
    #print(f"Total processed: {k} distances")
    
    return dists_cat

In [None]:
# Загрузка каталогов

coords_col_name = ['X', 'Y', 'Gmag', 'RA', 'Dec', 'RA_IRAF', 'Dec_IRAF', 'Name_a', 'Name_b']
coords_catalog, coords_catalog_filename = upload_file(
    "coords_g11.parquet", 
    "Uploading the coordinate catalog...", 
    coords_col_name
)
print(f"Number of stars: {len(coords_catalog)}")
print("Coordinate catalog head:")
print(coords_catalog.head())


In [None]:
# Использование

Mag_lim = 10
Theta_max = 0.7
coords_used_cols = ['RA', 'Dec']

dists_catalog = compute_distances(coords_catalog, coords_used_cols, Mag_lim, Theta_max, filtering_coords=False)

dists_catalog.write_parquet(f"dists_g{Mag_lim}.parquet")

print(f"Number of distances: {len(dists_catalog)}")
print("Distance catalog head:")
print(dists_catalog.head())

In [None]:
# Фильтрация звёзд каталога координат по Gmag, если нужно

Max_mag = 10
filtered_coords = coords_catalog.filter(pl.col('Gmag') < Max_mag)

output_file = f"coords_g{Max_mag}.parquet"
filtered_coords.write_parquet(output_file)
print(f"Filtered catalog saved to {output_file}\nNumber of stars: {len(filtered_coords)}")

print("Filtered catalog head:")
print(filtered_coords.head())