# **Installations**

# **Calling Alma Database**

In [1]:
import numpy as np
from astropy.table import Table
import pyvo
import sys
import matplotlib.pyplot as plt
import pandas as pd
import sklearn.cluster
import sys
import os
import pandas as pd
current_path = os.getcwd()
parent_dir = os.path.join(current_path, "..")
print("Current working directory:", current_path)
print("Path to the parent directory:",parent_dir)
sys.path.append(parent_dir)
import simulator as sm
import numpy as np
import astropy.units as U
from casatasks import exportfits, simobserve, tclean, gaincal, applycal
from casatools import table
from casatools import simulator as casa_simulator
import random
import shutil

service = pyvo.dal.TAPService("https://almascience.eso.org/tap")      # for the EU ALMA TAP service

# service = pyvo.dal.TAPService("https://almascience.nao.ac.jp/tap")  # for the EA ALMA TAP service
# service = pyvo.dal.TAPService("https://almascience.nrao.edu/tap")   # for the NA ALMA TAP service

Current working directory: /home/astro/Documents/GitHub/ALMASim/experimental
Path to the parent directory: /home/astro/Documents/GitHub/ALMASim/experimental/..


In [2]:
metadata = sm.query_for_metadata_by_science_type(os.path.join(parent_dir, 'metadata', 'Active_galaxies_metadata.csv'))

Available science keywords:
0: Lyman Break Galaxies (LBG), Sub-mm Galaxies (SMG)
1: The Sun
2: Astrochemistry
3: High-z Active Galactic Nuclei (AGN), Galaxy structure & evolution
4: Outflows, jets, feedback
5: Active Galactic Nuclei (AGN)/Quasars (QSO), Outflows, jets, feedback
6: Exo-planets
7: Outflows, jets, feedback, Galactic centres/nuclei
8: Active Galactic Nuclei (AGN)/Quasars (QSO), Galactic centres/nuclei
9: Spiral galaxies
10: Active Galactic Nuclei (AGN)/Quasars (QSO)
11: Disks around low-mass stars
12: Starburst galaxies, Gravitational lenses
13: Starburst galaxies, Galaxy structure & evolution
14: Starburst galaxies
15: Main sequence stars
16: Low-mass star formation
17: Damped Lyman Alpha (DLA) systems, Galaxy structure & evolution
18: Disks around low-mass stars, Exo-planets
19: Inter-Stellar Medium (ISM)/Molecular clouds
20: Inter-Stellar Medium (ISM)/Molecular clouds, Photon-Dominated Regions (PDR)/X-Ray Dominated Regions (XDR)
21: Brown dwarfs
22: Starbursts, star for

In [3]:
metadata.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1252 entries, 0 to 22478
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   ALMA_source_name         1252 non-null   object 
 1   Band                     1252 non-null   object 
 2   PWV                      1252 non-null   float32
 3   SB_name                  1252 non-null   object 
 4   Vel.res.                 1252 non-null   float64
 5   Ang.res.                 1252 non-null   float64
 6   RA                       1252 non-null   float64
 7   Dec                      1252 non-null   float64
 8   FOV                      1252 non-null   float64
 9   Int.Time                 1252 non-null   float64
 10  Total.Time               1252 non-null   float64
 11  Cont_sens_mJybeam        1252 non-null   float64
 12  Line_sens_10kms_mJybeam  1252 non-null   float64
 13  Obs.date                 1252 non-null   object 
 14  Bandwidth                125

# **Navigation through Alma Database Schema**

In [4]:
def list_table_columns(service, table_name):
    """List all column names for a given table in the database.

    Parameters:
    service (pyvo.dal.TAPService): A TAPService instance for querying the database.
    table_name (str): The name of the table to list columns for.

    Returns:
    list: A list of column names in the specified table.
    """
    query = f"""
        SELECT column_name
        FROM TAP_SCHEMA.columns
        WHERE table_name = '{table_name}'
    """

    try:
        result = service.search(query).to_table()
        return result['column_name'].tolist()
    except Exception as e:
        print(f"An error occurred: {e}")
        return []

# Example usage:
service = pyvo.dal.TAPService("https://almascience.eso.org/tap")
column_names = list_table_columns(service, 'ivoa.obscore')
print(column_names)

['access_estsize', 'access_format', 'access_url', 'antenna_arrays', 'asdm_uid', 'authors', 'band_list', 'bandwidth', 'bib_reference', 'calib_level', 'collections', 'cont_sensitivity_bandwidth', 'data_rights', 'dataproduct_type', 'em_max', 'em_min', 'em_res_power', 'em_resolution', 'em_xel', 'facility_name', 'first_author', 'frequency', 'frequency_support', 'gal_latitude', 'gal_longitude', 'group_ous_uid', 'instrument_name', 'is_mosaic', 'lastModified', 'member_ous_uid', 'o_ucd', 'obs_collection', 'obs_creator_name', 'obs_id', 'obs_publisher_did', 'obs_release_date', 'obs_title', 'pi_name', 'pi_userid', 'pol_states', 'pol_xel', 'proposal_abstract', 'proposal_authors', 'proposal_id', 'pub_abstract', 'pub_title', 'publication_year', 'pwv', 'qa2_passed', 's_dec', 's_fov', 's_ra', 's_region', 's_resolution', 's_xel1', 's_xel2', 'scan_intent', 'schedblock_name', 'science_keyword', 'science_observation', 'scientific_category', 'sensitivity_10kms', 'spatial_resolution', 'spatial_scale_max', 't

In [None]:
def get_science_types(sevice):
    query = f"""  
            SELECT science_keyword, scientific_category  
            FROM ivoa.obscore  
            WHERE science_observation = 'T'    
            """
    db = service.search(query).to_table().to_pandas()
    science_keywords = db['science_keyword'].unique()
    scientific_category = db['scientific_category'].unique()
    science_keywords = list(filter(lambda x: x != "", science_keywords))
    scientific_category = list(filter(lambda x: x != "", scientific_category))
    return  science_keywords, scientific_category

def query_by_science_type(service, science_keyword=None, scientific_category=None):
    """Query for all science observations of given member OUS UID and target name, selecting all columns of interest.

    Parameters:
    service (pyvo.dal.TAPService): A TAPService instance for querying the database.

    Returns:
    pandas.DataFrame: A table of query results.
    """
    if science_keyword == None:
        science_keyword = ""
    if scientific_category == None:
        scientific_category = ""
    print('Querying for science keyword: ', science_keyword)
    print('And scientific categories: ', scientific_category)

    query = f"""
            SELECT *
            FROM ivoa.obscore
            WHERE science_keyword like '%{science_keyword}%'
            AND scientific_category like '%{scientific_category}%'
            AND is_mosaic = 'F'
            """

    result = service.search(query).to_table().to_pandas()

    return result



In [None]:
science_keyword = input('Plese select the Science Keyword, leave empty for all:' )
scientific_category = input('Plese select the Scientific Category, leave empty for all:' )

In [None]:
print(science_keyword)
print(scientific_category)

In [None]:
science_keywords, scientific_category = get_science_types(service, )
print('Available science keywords:')
for i in range(len(science_keywords)):
    print(f'{i}: {science_keywords[i]}')
print('Available scientific categories:')
for i in range(len(scientific_category)):
    print(f'{i}: {scientific_category[i]}')

In [None]:
database = query_by_science_type(service, science_keyword, scientific_category)


In [None]:
print(database['scientific_category'].unique())
database.info()

In [2]:
metadata = sm.query_for_metadata_by_science_type(os.path.join(parent_dir, 'metadata', 'Active_galaxies_metadata.csv'))

Available science keywords:
0: Lyman Break Galaxies (LBG), Sub-mm Galaxies (SMG)
1: The Sun
2: Astrochemistry
3: High-z Active Galactic Nuclei (AGN), Galaxy structure & evolution
4: Outflows, jets, feedback
5: Active Galactic Nuclei (AGN)/Quasars (QSO), Outflows, jets, feedback
6: Exo-planets
7: Outflows, jets, feedback, Galactic centres/nuclei
8: Active Galactic Nuclei (AGN)/Quasars (QSO), Galactic centres/nuclei
9: Spiral galaxies
10: Active Galactic Nuclei (AGN)/Quasars (QSO)
11: Disks around low-mass stars
12: Starburst galaxies, Gravitational lenses
13: Starburst galaxies, Galaxy structure & evolution
14: Starburst galaxies
15: Main sequence stars
16: Low-mass star formation
17: Damped Lyman Alpha (DLA) systems, Galaxy structure & evolution
18: Disks around low-mass stars, Exo-planets
19: Inter-Stellar Medium (ISM)/Molecular clouds
20: Inter-Stellar Medium (ISM)/Molecular clouds, Photon-Dominated Regions (PDR)/X-Ray Dominated Regions (XDR)
21: Brown dwarfs
22: Starbursts, star for

ValueError: invalid literal for int() with base 10: ''

In [11]:
scientific_category_number = input('Plese select the Scientific Category by number, leave empty for all:' )
print(scientific_category_number)
print(type(scientific_category_number))
scientific_category_number = [int(x) for x in scientific_category_number.split(' ')]
scientific_categories = ['a', 'b', 'c']
scientific_category = [scientific_categories[i] for i in scientific_category_number]
print(scientific_category)
print(type(scientific_category))

0 1
<class 'str'>
['a', 'b']
<class 'list'>


In [3]:
metadata.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6658 entries, 0 to 136473
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   ALMA_source_name         6658 non-null   object 
 1   Band                     6658 non-null   object 
 2   PWV                      6658 non-null   float32
 3   SB_name                  6658 non-null   object 
 4   Vel.res.                 6658 non-null   float64
 5   Ang.res.                 6658 non-null   float64
 6   RA                       6658 non-null   float64
 7   Dec                      6658 non-null   float64
 8   FOV                      6658 non-null   float64
 9   Int.Time                 6658 non-null   float64
 10  Total.Time               6658 non-null   float64
 11  Cont_sens_mJybeam        6658 non-null   float64
 12  Line_sens_10kms_mJybeam  6658 non-null   float64
 13  Obs.date                 6658 non-null   object 
 14  Bandwidth                66

# **Extracting Metadata from the Alma Database**

In [None]:
import pandas as pd
import pyvo

def query_observations(service, member_ous_uid, target_name):
    """Query for all science observations of given member OUS UID and target name, selecting all columns of interest.

    Parameters:
    service (pyvo.dal.TAPService): A TAPService instance for querying the database.
    member_ous_uid (str): The unique identifier for the member OUS to filter observations by.
    target_name (str): The target name to filter observations by.

    Returns:
    pandas.DataFrame: A table of query results.
    """

    query = f"""
            SELECT *
            FROM ivoa.obscore
            WHERE member_ous_uid = '{member_ous_uid}'
            AND target_name = '{target_name}'
            AND is_mosaic = 'F'
            """

    result = service.search(query).to_table().to_pandas()

    return result

def query_all_targets(service, targets):
    """Query observations for all predefined targets and compile the results into a single DataFrame.

    Parameters:
    service (pyvo.dal.TAPService): A TAPService instance for querying the database.
    targets (list of tuples): A list where each tuple contains (target_name, member_ous_uid).

    Returns:
    pandas.DataFrame: A DataFrame containing the results for all queried targets.
    """
    results = []

    for target_name, member_ous_uid in targets:
        result = query_observations(service, member_ous_uid, target_name)
        results.append(result)

    # Concatenate all DataFrames into a single DataFrame
    df = pd.concat(results, ignore_index=True)

    return df

def query_for_metadata(targets, path, service_url: str = "https://almascience.eso.org/tap"):
    """Query for metadata for all predefined targets and compile the results into a single DataFrame.

    Parameters:
    service_url (str): A TAPService http address for querying the database.
    targets (list of tuples): A list where each tuple contains (target_name, member_ous_uid).
    path (str): The path to save the results to.

    Returns:
    pandas.DataFrame: A DataFrame containing the results for all queried targets.
    """
    # Create a TAPService instance (replace 'your_service_url' with the actual URL)
    service = pyvo.dal.TAPService(service_url)
    # Query all targets and compile the results
    df = query_all_targets(service, targets)
    df = df.drop_duplicates(subset='member_ous_uid')
    # Define a dictionary to map existing column names to new names with unit initials
    rename_columns = {
    'target_name': 'ALMA_source_name',
    'pwv': 'PWV',
    'schedblock_name': 'SB_name',
    'velocity_resolution': 'Vel.res',
    'spatial_resolution': 'Ang.res.',
    's_ra': 'RA',
    's_dec': 'Dec',
    's_fov': 'FOV',
    't_resolution': 'Int.Time',
    't_max': 'Total.Time',
    'cont_sensitivity_bandwidth': 'Cont_sens_mJybeam',
    'sensitivity_10kms': 'Line_sens_10kms_mJybeam',
    'obs_release_date': 'Obs.date',
    'band_list': 'Band',
    }
    # Rename the columns in the DataFrame
    df.rename(columns=rename_columns, inplace=True)
    database = df[['ALMA_source_name', 'Band', 'PWV', 'SB_name', 'Vel.res', 'Ang.res.', 'RA', 'Dec', 'FOV', 'Int.Time', 'Total.Time', 'Cont_sens_mJybeam', 'Line_sens_10kms_mJybeam', 'Obs.date']]
    database['Obs.date'] = database['Obs.date'].apply(lambda x: x.split('T')[0])
    database.to_csv(path, index=False)
    return database
    






In [None]:
service_url = "https://almascience.eso.org/tap"
path = "/home/astro/Documents/GitHub/ALMASim/metadata/QSO_metadata.csv"
# Predefined list of targets and their corresponding member OUS UIDs
targets = [
    ("J0842+1218", "uid://A001/X2fb/X3ec"),
    ("J0842+1218", "uid://A001/X885/X36b"),
    ("J0842+1218", "uid://A001/X1590/X397"),
    ("J0842+1218C1", "uid://A001/X1590/X1ef6"),
    ("SDSS_J092303.53+024739.5", "uid://A001/X12e/X2e6"),
    ("J1319+0950", "uid://A002/X391d0b/X1e"),
    ("J1319+0950", "uid://A002/X7fb989/X18"),
    ("J1319+0950", "uid://A001/X1465/X380c"),
    ("J1319+0950C1", "uid://A001/X1590/X1f06"),
    ("J1319+0950", "uid://A001/X2d20/X15d5"),
    ("ULASJ1319+0950", "uid://A001/X3621/X4412"),
    ("SDSS_J132853.66-022441.6", "uid://A001/X12e/X2ea"),
    ("PJ065-26", "uid://A001/X2fb/X3e4"),
    ("PJ065-26", "uid://A001/X1273/X36c"),
    ("PJ065-26", "uid://A001/X1465/X3830"),
    ("PSO_J167.6415-13.4960", "uid://A001/X2d6/X7d"),
    ("PJ231-20", "uid://A001/X2fb/X440"),
    ("PJ231-20", "uid://A001/X1465/X3810"),
    ("PJ231-20", "uid://A001/X1590/X3a3"),
    ("PJ231-20", "uid://A001/X15aa/X1f7"),
    ("PJ231-20", "uid://A001/X2d1f/X82d"),
    ("PJ231-20", "uid://A001/X2d1f/X829"),
    ("PJ308-21", "uid://A001/X2fb/X418"),
    ("PJ308-21", "uid://A001/X11a4/Xf"),
    ("PJ308-21", "uid://A001/X1465/X382c"),
    ("PJ308-21", "uid://A001/X1590/X38f"),
    ("PJ308-21", "uid://A001/X2d20/X3b14"),
    ("PJ308-21", "uid://A001/X2d20/X3b10"),
    ("WMH5", "uid://A001/X121/X24b"),
    ("WMH13", "uid://A001/X121/X24f"),
    ("WMH_5", "uid://A001/X2d6/X5a"),
    ("J1509-1749", "uid://A001/X2fb/X414"),
    ("J1509-1749", "uid://A001/X1465/X3824"),
    ("J1509-1749", "uid://A001/X1590/X11e2"),
    ("J1509-1749", "uid://A001/X1590/X11c6"),
    ("J1509-1749", "uid://A001/X1590/X11de"),
    ("J1306+0356", "uid://A001/X2fb/X40c"),
    ("J1306+0356", "uid://A001/X1273/X374"),
    ("J1306+0356", "uid://A001/X1590/X39f"),
    ("J1306+0356C1", "uid://A001/X1590/X1f02")
]
database = query_for_metadata(targets, path, service_url)



# **Variable Info**

# **Cleaning Variable Names**

In [None]:
dates = database['Obs.date'].values
date = dates[0]
check = date.split('T')[0]
check