In [1]:
# Libraries

# Access astronomical databases
from pyvo import registry  # version >=1.6

# Moc and HEALPix tools
from mocpy import MOC

# Coordinates manipulation
from astropy.coordinates import SkyCoord

# Sky visualization
from ipyaladin import Aladin  # version >=0.4.0

# For plots
import matplotlib.pyplot as plt

# Numerical Tools
import numpy as np

# Save to json files
import json

In [2]:
# the catalogue name in VizieR
CATALOGUE = "J/ApJS/266/14"

We first retrieve the catalogue information.

In [3]:
# each resource in the VO has an identifier, called ivoid. For vizier catalogs,
# the VO ids can be constructed like this:
catalogue_ivoid = f"ivo://CDS.VizieR/{CATALOGUE}"
# the actual query to the registry
voresource = registry.search(ivoid=catalogue_ivoid)[0]

In [4]:
# We can print metadata information about the catalogue
voresource.describe(verbose=True)

Am stars selected in LAMOST DR8-DR10
Short Name: J/ApJS/266/14
IVOA Identifier: ivo://cds.vizier/j/apjs/266/14
Access modes: conesearch, tap#aux
- tap#aux: https://tapvizier.cds.unistra.fr/TAPVizieR/tap
- conesearch: https://vizier.cds.unistra.fr/viz-
 bin/conesearch/J/ApJS/266/14/table1?, description: Cone search capability for
 table J/ApJS/266/14/table1 (The new catalog of the Am candidates based on
 LAMOST)
- conesearch: https://vizier.cds.unistra.fr/viz-
 bin/conesearch/J/ApJS/266/14/table2?, description: Cone search capability for
 table J/ApJS/266/14/table2 (The new catalog of eclipsing Am binary candidates
 based on LAMOST)
- conesearch: https://vizier.cds.unistra.fr/viz-
 bin/conesearch/J/ApJS/266/14/table3?, description: Cone search capability for
 table J/ApJS/266/14/table3 (The catalog of the eclipsing Am binary candidates
 based on the known Am catalogs and cross-match results)

A total of about 21,600 Am candidates were detected with the MKCLASS code
based on the low-reso

We can also inspect in details the `resource` object and access the attributes not provided by the describe method. See for example, the first author of a resource: 

In [5]:
voresource.creators[0]

'Tian X.-M.'

## 3. Access the tabular data of this catalog

We can have a look at the tables available in the catalogue.

In [6]:
tables = voresource.get_tables()
print(f"In this catalogue, we have {len(tables)} tables.")
for table_name, table in tables.items():
    print(f"{table_name}: {table.description}")



In this catalogue, we have 3 tables.
J/ApJS/266/14/table1: The new catalog of the Am candidates based on LAMOST
J/ApJS/266/14/table2: The new catalog of eclipsing Am binary candidates based on LAMOST
J/ApJS/266/14/table3: The catalog of the eclipsing Am binary candidates based on the known Am catalogs and cross-match results


In [7]:
# We can also extract the tables names for later use
tables_names = list(tables.keys())
tables_names

['J/ApJS/266/14/table1', 'J/ApJS/266/14/table2', 'J/ApJS/266/14/table3']

The actual data can then be accessed using any of the ``access_modes`` of the voresource.

In [8]:
voresource.access_modes()

{'conesearch', 'tap#aux'}

The web access is found by following the ``reference_url``

In [9]:
voresource.reference_url

'https://cdsarc.cds.unistra.fr/viz-bin/cat/J/ApJS/266/14'

### 3.1 Execute a SQL/ADQL query

The ``tap#aux`` in the ``access_mode`` response indicates that we can also do a SQL/ADQL query for these VizieR tables.

On the first table of the catalogue, we execute an <a href='https://www.ivoa.net/documents/latest/ADQL.html'>ADQL</a> query.

In [17]:
# get the first table of the catalogue
first_table_name = tables_names[0]

# execute a synchronous ADQL query
tap_service = voresource.get_service("tap")
tap_records = tap_service.search(
    f"""SELECT recno, subCl, SpT, Teff1, logg1, FeH1, Vmag, Teff2
    , logg2, FeH2, Rad, Mass, Lum FROM "{first_table_name}"
    ORDER BY recno""",
)         

# f"""SELECT recno, subCl, SpT, Teff1, e_Teff1, logg1, e_logg1, FeH1, e_FeH1, Vmag, e_Vmag, Teff2, s_Teff2
#     , logg2, s_logg2, FeH2, e_FeH2, Rad, s_Rad, Mass, s_Mass, Lum, s_Lum FROM "{first_table_name}"
#     ORDER BY recno"""


tap_records.to_table()

The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.


recno,subCl,SpT,Teff1,logg1,FeH1,Vmag,Teff2,logg2,FeH2,Rad,Mass,Lum
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,K,log(cm.s**-2),log(Sun),mag,K,log(cm.s**-2),log(Sun),Rsun,Msun,Lsun
int32,object,object,int16,float64,float64,float64,int16,float64,float64,float64,float64,float64
1,F0,kA7hF3mF2,7080,4.178,-0.148,10.907,7078,3.475,-0.151,3.797,1.57,32.60238
2,A6IV,kA7hF0mF0,7404,4.069,0.031,10.41,7405,4.2137,0.032,1.684,1.691,7.68098
3,F0,kA7hF1mF0,7330,4.102,-0.133,14.155,7331,4.3672,-0.133,1.398,1.66,5.08514
4,A7V,kA5hA4mA9,7218,4.182,-0.3,13.497,7220,4.2537,-0.301,1.574,1.62,6.06511
5,A7V,kA2hA8mA9,--,--,--,12.552,7667,3.9027,--,2.485,1.8,19.22473
6,A1IV,kA1hA2mA3,--,--,--,12.677,7915,3.6621,--,3.368,1.9,40.1118
7,A7V,kA4hA8mA9,--,--,--,--,--,--,--,--,--,--
8,A3IV,kA3hA8mA7,--,--,--,13.114,7079,3.2688,-0.465,4.815,1.57,52.46422
9,F0,kA6hA9mF0,7500,3.9,0.185,11.148,7495,--,0.181,2.564,--,--
...,...,...,...,...,...,...,...,...,...,...,...,...


It is convenient to store the data in a .json file, which we now implement.

In [18]:
table = tap_records.to_table()
data = table.to_pandas()
data_dict = data.to_dict(orient='records')  # Convert to list of dictionaries

# Save to JSON file
with open('star.json', 'w') as f:
    json.dump(data_dict, f, indent=4)

data.to_csv('data_lakehouse/bronze/starB.csv', index=False)  
data

Unnamed: 0,recno,subCl,SpT,Teff1,logg1,FeH1,Vmag,Teff2,logg2,FeH2,Rad,Mass,Lum
0,1,F0,kA7hF3mF2,7080,4.178,-0.148,10.907,7078,3.4750,-0.151,3.797,1.570,32.60238
1,2,A6IV,kA7hF0mF0,7404,4.069,0.031,10.410,7405,4.2137,0.032,1.684,1.691,7.68098
2,3,F0,kA7hF1mF0,7330,4.102,-0.133,14.155,7331,4.3672,-0.133,1.398,1.660,5.08514
3,4,A7V,kA5hA4mA9,7218,4.182,-0.300,13.497,7220,4.2537,-0.301,1.574,1.620,6.06511
4,5,A7V,kA2hA8mA9,,,,12.552,7667,3.9027,,2.485,1.800,19.22473
...,...,...,...,...,...,...,...,...,...,...,...,...,...
21630,21631,F0,kA7hF0mF0,7357,3.913,0.236,13.035,7358,3.7906,0.235,2.723,1.670,19.58132
21631,21632,F0,kA2hF0mF2,7380,3.836,0.559,14.341,6798,3.8803,,2.296,1.460,10.14622
21632,21633,F0,kA7hF2mF0,7061,4.112,-0.371,15.425,6974,4.1349,,1.754,1.530,6.55372
21633,21634,A7,kA3hA5mA7,,,,15.866,6228,4.5680,,0.943,1.200,1.20602


In [19]:
missing_values_count = data.isnull().sum()

missing_values_count

recno        0
subCl        0
SpT          0
Teff1     8022
logg1     8023
FeH1      8022
Vmag      1261
Teff2     1877
logg2     3564
FeH2     11824
Rad       2433
Mass      3564
Lum       3563
dtype: int64

In [20]:
print( len( data['recno'] ) )
print( data['recno'].nunique() )

21635
21635


In [21]:
print( 'NUmber of subclasses: ' + str(data['subCl'].nunique() ) )
print( 'NUmber of SpT: ' + str(data['SpT'].nunique() ) )

NUmber of subclasses: 26
NUmber of SpT: 978


In [22]:
for column in data.columns:
    print(column + ': ' + str(data[column].isnull().sum()) + ' NaNs' )

recno: 0 NaNs
subCl: 0 NaNs
SpT: 0 NaNs
Teff1: 8022 NaNs
logg1: 8023 NaNs
FeH1: 8022 NaNs
Vmag: 1261 NaNs
Teff2: 1877 NaNs
logg2: 3564 NaNs
FeH2: 11824 NaNs
Rad: 2433 NaNs
Mass: 3564 NaNs
Lum: 3563 NaNs


In [16]:
np.diff(data['Teff1'])

array([ 324.,  -74., -112., ..., -319.,   nan,   nan], shape=(21634,))