In [1]:
from astropy.constants import c
import astropy.units as U
import math
import pyvo
import numpy as np 
import pandas as pd
import os 
import sys
import matplotlib.pyplot as plt

In [2]:
def query_observations(service, member_ous_uid, target_name):
    """Query for all science observations of given member OUS UID and target name, selecting all columns of interest.

    Parameters:
    service (pyvo.dal.TAPService): A TAPService instance for querying the database.
    member_ous_uid (str): The unique identifier for the member OUS to filter observations by.
    target_name (str): The target name to filter observations by.

    Returns:
    pandas.DataFrame: A table of query results.
    """

    query = f"""
            SELECT target_name, member_ous_uid
            FROM ivoa.obscore
            WHERE member_ous_uid = '{member_ous_uid}'
            AND target_name = '{target_name}'
            AND is_mosaic = 'F'
            """

    result = service.search(query).to_table().to_pandas()

    return result

In [3]:
def query_all_targets(service, targets):
    """Query observations for all predefined targets and compile the results into a single DataFrame.

    Parameters:
    service (pyvo.dal.TAPService): A TAPService instance for querying the database.
    targets (list of tuples): A list where each tuple contains (target_name, member_ous_uid).

    Returns:
    pandas.DataFrame: A DataFrame containing the results for all queried targets.
    """
    results = []

    for target_name, member_ous_uid in targets:
        result = query_observations(service, member_ous_uid, target_name)
        results.append(result)

    # Concatenate all DataFrames into a single DataFrame
    df = pd.concat(results, ignore_index=True)

    return df


In [4]:
def query_for_metadata_by_targets(targets, path, service_url: str = "https://almascience.eso.org/tap"):
    """Query for metadata for all predefined targets and compile the results into a single DataFrame.

    Parameters:
    service_url (str): A TAPService http address for querying the database.
    targets (list of tuples): A list where each tuple contains (target_name, member_ous_uid).
    path (str): The path to save the results to.

    Returns:
    pandas.DataFrame: A DataFrame containing the results for all queried targets.
    """
    # Create a TAPService instance (replace 'your_service_url' with the actual URL)
    service = pyvo.dal.TAPService(service_url)
    # Query all targets and compile the results
    df = query_all_targets(service, targets)
    df = df.drop_duplicates(subset='member_ous_uid')
    # Define a dictionary to map existing column names to new names with unit initials
    rename_columns = {
    'target_name': 'ALMA_source_name',

    }
    # Rename the columns in the DataFrame
    df.rename(columns=rename_columns, inplace=True)
    database = df[['ALMA_source_name']]
    database.to_csv(path, index=False)
    return database

In [5]:
def get_science_types(service):
    query = f"""  
            SELECT science_keyword, scientific_category  
            FROM ivoa.obscore  
            WHERE science_observation = 'T'    
            """
    db = service.search(query).to_table().to_pandas()
    science_keywords = db['science_keyword'].unique()
    scientific_category = db['scientific_category'].unique()
    science_keywords = list(filter(lambda x: x != "", science_keywords))
    scientific_category = list(filter(lambda x: x != "", scientific_category))

    unique_keywords = []

    # Iteration through each string in the list
    for keywords_string in science_keywords:
        # Divide the string into words and remove white spaces
        keywords_list = [keyword.strip() for keyword in keywords_string.split(',')]
        # Add words to the unique values list
        unique_keywords.extend(keywords_list)

    # Use set function to obtain unique values
    unique_keywords = sorted(set(unique_keywords))
    unique_keywords = [keyword for keyword in unique_keywords if (
                        keyword != 'Evolved stars: Shaping/physical structure' and
                        keyword != 'Exo-planets' and 
                        keyword != 'Galaxy structure &evolution')]

    return  unique_keywords, scientific_category

In [6]:
def query_by_science_type(service, science_keyword=None, scientific_category=None, band=None):
    """Query for all science observations of given member OUS UID and target name, selecting all columns of interest.

    Parameters:
    service (pyvo.dal.TAPService): A TAPService instance for querying the database.

    Returns:
    pandas.DataFrame: A table of query results.
    """
    if science_keyword == None:
        science_keyword = ""
    if scientific_category == None:
        scientific_category = ""
    if band == None:
        band = ""
    print('Querying for science keyword/s: ', science_keyword)
    print('And scientific category/ies: ', scientific_category)
    print('And band/s: ', band)
    if type(science_keyword) == list and len(science_keyword) == 1:
        science_keyword = science_keyword[0]
        science_keyword_query = f"science_keyword like '%{science_keyword}%'"
    elif type(science_keyword) == list and len(science_keyword) > 1:
        science_keyword = "', '".join(science_keyword)
        science_keyword_query = f"science_keyword in ('{science_keyword}')"
    else:
        science_keyword_query = f"science_keyword like '%{science_keyword}%'"
    if type(scientific_category) == list and len(scientific_category) == 1:
        scientific_category = scientific_category[0]
        scientific_category_query = f"scientific_category like '%{scientific_category}%'"
    elif type(scientific_category) == list and len(scientific_category) > 1:
        scientific_category = "', '".join(scientific_category)
        scientific_category_query = f"scientific_category in ('{scientific_category}')"
    else:
        scientific_category_query = f"scientific_category like '%{scientific_category}%'"
    if type(band) == list and len(band) == 1:
        band = band[0]
        band_query = f"band_list like '%{band}%'"
    elif type(band) == list and len(band) > 1:
        band = "', '".join(band)
        band_query = f"band_list in ('{band}')"
    else:
        band_query = f"band_list like '%{band}%'"

    query = f"""
            SELECT target_name, member_ous_uid
            FROM ivoa.obscore
            WHERE {science_keyword_query}
            AND {scientific_category_query}
            AND is_mosaic = 'F'
            AND {band_query}
            """

    result = service.search(query).to_table().to_pandas()

    return result


In [7]:
def query_for_metadata_by_science_type(path, service_url: str = "https://almascience.eso.org/tap"):
    service = pyvo.dal.TAPService(service_url)
    science_keywords, scientific_categories = get_science_types(service)
    print('Available science keywords:')
    for i in range(len(science_keywords)):
        print(f'{i}: {science_keywords[i]}')
    print('Available scientific categories:')
    for i in range(len(scientific_categories)):
        print(f'{i}: {scientific_categories[i]}')
    science_keyword_number = input('Select the Science Keyword by number, if you want to select multiple numbers separate them by a space, leave empty for all: ' )
    scientific_category_number = input('Select the Scientific Category by number, if you want to select multiple numbers separate them by a space, leave empty for all: ' )
    band = input('Select observing bands, if you want to select multiple bands separate them by a space, leave empty for all: ')
    if science_keyword_number == "":
        science_keyword = None
    else:
        science_keyword_number = [int(x) for x in science_keyword_number.split(' ') if x != '']
        science_keyword = [science_keywords[i] for i in science_keyword_number]

    duplicates = ['Evolved stars: Shaping/physical structure', 'Exo-planets', 'Galaxy structure &evolution']
    original = ['Evolved stars - Shaping/physical structure', 'Exoplanets', 'Galaxy structure & evolution']
    for i in range(len(original)):
        if original[i] in [science_keyword]:
            science_keywords.append(duplicates[i])
    if scientific_category_number == "":
        scientific_category = None
    else:
        scientific_category_number = [int(x) for x in scientific_category_number.split(' ') if x != '']
        scientific_category = [scientific_categories[i] for i in scientific_category_number]
    if band == "":
        bands = None
    else:
        bands = [int(x) for x in band.split(' ') if x != '']
    df = query_by_science_type(service, science_keyword, scientific_category, bands)
    df = df.drop_duplicates(subset='member_ous_uid')
    
    # Define a dictionary to map existing column names to new names with unit initials
    rename_columns = {
    'target_name': 'ALMA_source_name',

    }
    # Rename the columns in the DataFrame
    df.rename(columns=rename_columns, inplace=True)
    database = df[['ALMA_source_name']]
    database.to_csv(path, index=False)
    return database


In [8]:
parent_dir = os.path.dirname(os.getcwd())
metadatafull = query_for_metadata_by_science_type(os.path.join(parent_dir, 'metadata', 'Active_galaxies_metadata.csv'))

Available science keywords:
0: Active Galactic Nuclei (AGN)/Quasars (QSO)
1: Astrochemistry
2: Asymptotic Giant Branch (AGB) stars
3: Black holes
4: Brown dwarfs
5: Cataclysmic stars
6: Cosmic Microwave Background (CMB)/Sunyaev-Zel'dovich Effect (SZE)
7: Damped Lyman Alpha (DLA) systems
8: Debris disks
9: Disks around high-mass stars
10: Disks around low-mass stars
11: Dwarf/metal-poor galaxies
12: Early-type galaxies
13: Evolved stars - Chemistry
14: Evolved stars - Shaping/physical structure
15: Exoplanets
16: Galactic centres/nuclei
17: Galaxy Clusters
18: Galaxy chemistry
19: Galaxy groups and clusters
20: Galaxy structure & evolution
21: Gamma Ray Bursts (GRB)
22: Giant Molecular Clouds (GMC) properties
23: Gravitational lenses
24: HII regions
25: High-mass star formation
26: High-z Active Galactic Nuclei (AGN)
27: Hypergiants
28: Infra-Red Dark Clouds (IRDC)
29: Inter-Stellar Medium (ISM)/Molecular clouds
30: Intermediate-mass star formation
31: Low-mass star formation
32: Lumino

In [21]:
def query_for_metadata_by_science_type2(path, service_url: str = "https://almascience.eso.org/tap"):
    service = pyvo.dal.TAPService(service_url)
    science_keywords, scientific_categories = get_science_types(service)
    print('Available science keywords:')
    for i in range(len(science_keywords)):
        print(f'{i}: {science_keywords[i]}')
    print('Available scientific categories:')
    for i in range(len(scientific_categories)):
        print(f'{i}: {scientific_categories[i]}')
    
    res= []
    for science_keyword_number in range(len(science_keywords)):
        for scientific_category_number in range(len(scientific_categories)):
            for band in range(1,11):
                science_keyword= str(science_keyword_number)
                scientific_category = str(scientific_category_number)
                band = str(band)
                
                df = query_by_science_type(service, [science_keywords[science_keyword_number]], 
                                           [scientific_categories[scientific_category_number]], [band])
                df = df.drop_duplicates(subset='member_ous_uid')

                res.append(df)

    df = pd.concat(res, ignore_index=True)
    # Define a dictionary to map existing column names to new names with unit initials
    rename_columns = {
    'target_name': 'ALMA_source_name',

    }
    # Rename the columns in the DataFrame
    df.rename(columns=rename_columns, inplace=True)
    database = df[['ALMA_source_name']]
    database.to_csv(path, index=False)
    return database

In [22]:
metadataciclo = query_for_metadata_by_science_type2(os.path.join(parent_dir, 'metadata', 'Active_galaxies_metadata1.csv'))

Available science keywords:
0: Active Galactic Nuclei (AGN)/Quasars (QSO)
1: Astrochemistry
2: Asymptotic Giant Branch (AGB) stars
3: Black holes
4: Brown dwarfs
5: Cataclysmic stars
6: Cosmic Microwave Background (CMB)/Sunyaev-Zel'dovich Effect (SZE)
7: Damped Lyman Alpha (DLA) systems
8: Debris disks
9: Disks around high-mass stars
10: Disks around low-mass stars
11: Dwarf/metal-poor galaxies
12: Early-type galaxies
13: Evolved stars - Chemistry
14: Evolved stars - Shaping/physical structure
15: Exoplanets
16: Galactic centres/nuclei
17: Galaxy Clusters
18: Galaxy chemistry
19: Galaxy groups and clusters
20: Galaxy structure & evolution
21: Gamma Ray Bursts (GRB)
22: Giant Molecular Clouds (GMC) properties
23: Gravitational lenses
24: HII regions
25: High-mass star formation
26: High-z Active Galactic Nuclei (AGN)
27: Hypergiants
28: Infra-Red Dark Clouds (IRDC)
29: Inter-Stellar Medium (ISM)/Molecular clouds
30: Intermediate-mass star formation
31: Low-mass star formation
32: Lumino

DALQueryError: ADQL syntax error: Encountered " <S_IDENTIFIER> "dovich "" at line 4, column 88.
Was expecting one of:
    <EOF> 
    "GROUP" ...
    "LIMIT" ...
    "ORDER" ...
    "OFFSET" ...
    "HAVING" ...
    "ESCAPE" ...
    ";" ...
    "ESCAPE" ...
    "GROUP" ...
    "HAVING" ...
    "ORDER" ...
    "LIMIT" ...
    "OFFSET" ...
    "LIMIT" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...
    "ESCAPE" ...

In [None]:
metadatafull=metadatafull.sort_values(by=list(metadatafull.columns))
metadataciclo=metadataciclo.sort_values(by=list(metadataciclo.columns))

if metadatafull.equals(metadataciclo):
    print("I DataFrame sono uguali.")
else:
    print("I DataFrame non sono uguali.")