In [2]:
#/usr/bin/env python
"""
Purpose of file: Download a subset (set by command line argument) of the rows of GAIA DR3, for the columns containing kinematic information 
suitable for parallelization via job submission system like slurm. 

Outputs:
Writes the velocity data according to ADQL_base_script (defined in script). Default format for output files is csv.

NOTES:
-   On inspecting outputs: Check the number of lines (objects) via "wc -l -c filename" where you
    replace filename, but only works as expected for csv files. In general to check file size in a human
    readable format, type "du -sh filename" and in the output "K" is kilobytes, "M" is megabytes and so on.
    For all files in folder do "du -ha"

-   On unzipping: to unzip a folder recursively and overwrite the originals, use "gunzip -r folder_name"
"""

from astroquery.gaia import Gaia
# import sys
import glob
import sys

# For timing execution
import time
start_time = time.time()

# ----------------- Set job parameters ----------------------

# Define login details (necessary to avoid download limits)
username = 'hsu01' # write your username
password = 'Ch!13902986922'   # write your password
Gaia.login(user=username, password=password)

data_dir = "/ocean/projects/phy210068p/hsu1/Ananke_datasets_training/Gaiadr3_data/gaia_download_newest_12062023" # the folder with lots of storage where we'll save the files


# Add TOP x after "SELECT" below to only get these columns for the first x objects (x a natural number) eg "SELECT TOP 10 ..."
# The indentation isnt necessary in the ADQL script but it is good for readability

ADQL_base_script = '''SELECT TOP %s
                        source_id, ra, dec, l,b, parallax, parallax_error, pmra, pmra_error, pmdec, pmdec_error, 
                        parallax_pmra_corr, parallax_pmdec_corr, pmra_pmdec_corr, ruwe, radial_velocity, 
                        radial_velocity_error, parallax_over_error, phot_g_mean_mag,rv_expected_sig_to_noise
                    FROM gaiadr3.gaia_source
                    WHERE parallax_over_error > 10.0
                    OFFSET %s
                '''

row_lim = 1000000
offset = 1000000 * 0 # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10

# Print job info
print("\nStarting query for starting value {} and top {} rows".format(offset,row_lim))

# Define query and job name
query = ADQL_base_script % (row_lim,offset)

jobname = 'DR3_6D_kinematics_{}_to_{}'.format(offset,offset+row_lim) # Sets the output file name too
output_filename = data_dir + jobname + '.csv'

# Check if we already got this data
if len(glob.glob(output_filename))==1:
    print("Cancelling this job, " + output_filename + " already exists")
    print("\nExiting execution...")
    sys.exit()
    
# Run job
job=Gaia.launch_job_async(query, name=jobname, dump_to_file=True, output_format='csv',output_file = output_filename) # fits files are compressed
job.get_results() 

print("Job finished and result saved to " + output_filename + "\n")

# Delete the job from our cache (so we dont hit our quota)
print("Deleting job with id {}".format(job.jobid))
Gaia.remove_jobs([job.jobid])

# time execution
print("\nExecution took %s seconds\n" % (time.time() - start_time))

INFO: Login to gaia TAP server [astroquery.gaia.core]
INFO: OK [astroquery.utils.tap.core]
INFO: Login to gaia data server [astroquery.gaia.core]
INFO: OK [astroquery.utils.tap.core]

Starting query for starting value 0 and top 1000000 rows
Job finished and result saved to /ocean/projects/phy210068p/hsu1/Ananke_datasets_training/Gaiadr3_data/gaia_download_newest_12062023DR3_6D_kinematics_0_to_1000000.csv

Deleting job with id 1701889681134O
INFO: Removed jobs: '['1701889681134O']'. [astroquery.utils.tap.core]

Execution took 297.89169454574585 seconds



In [1]:
#/usr/bin/env python
"""
Purpose of file: Download a subset (set by command line argument) of the rows of GAIA DR3, for the columns containing kinematic information 
suitable for parallelization via job submission system like slurm. 

Outputs:
Writes the velocity data according to ADQL_base_script (defined in script). Default format for output files is csv.

NOTES:
-   On inspecting outputs: Check the number of lines (objects) via "wc -l -c filename" where you
    replace filename, but only works as expected for csv files. In general to check file size in a human
    readable format, type "du -sh filename" and in the output "K" is kilobytes, "M" is megabytes and so on.
    For all files in folder do "du -ha"

-   On unzipping: to unzip a folder recursively and overwrite the originals, use "gunzip -r folder_name"
"""

from astroquery.gaia import Gaia
# import sys
import glob
import sys

# For timing execution
import time
start_time = time.time()

# ----------------- Set job parameters ----------------------

# Define login details (necessary to avoid download limits)
username = 'hsu01' # write your username
password = 'Ch!13902986922'   # write your password
Gaia.login(user=username, password=password)

data_dir = "/ocean/projects/phy210068p/hsu1/Ananke_datasets_training/Gaiadr3_data/gaia_download_newest_12062023" # the folder with lots of storage where we'll save the files


# Add TOP x after "SELECT" below to only get these columns for the first x objects (x a natural number) eg "SELECT TOP 10 ..."
# The indentation isnt necessary in the ADQL script but it is good for readability

ADQL_base_script = '''SELECT TOP %s
                        source_id, ra, dec, l,b, parallax, parallax_error, pmra, pmra_error, pmdec, pmdec_error, 
                        parallax_pmra_corr, parallax_pmdec_corr, pmra_pmdec_corr, ruwe, radial_velocity, 
                        radial_velocity_error, parallax_over_error, phot_g_mean_mag,rv_expected_sig_to_noise
                    FROM gaiadr3.gaia_source
                    WHERE parallax_over_error > 10.0
                    OFFSET %s
                '''

row_lim = 3000000
offset = 3000000 * 1 # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10

# Print job info
print("\nStarting query for starting value {} and top {} rows".format(offset,row_lim))

# Define query and job name
query = ADQL_base_script % (row_lim,offset)

jobname = 'DR3_6D_kinematics_{}_to_{}'.format(offset,offset+row_lim) # Sets the output file name too
output_filename = data_dir + jobname + '.csv'

# Check if we already got this data
if len(glob.glob(output_filename))==1:
    print("Cancelling this job, " + output_filename + " already exists")
    print("\nExiting execution...")
    sys.exit()
    
# Run job
job=Gaia.launch_job_async(query, name=jobname, dump_to_file=True, output_format='csv',output_file = output_filename) # fits files are compressed
job.get_results() 

print("Job finished and result saved to " + output_filename + "\n")

# Delete the job from our cache (so we dont hit our quota)
print("Deleting job with id {}".format(job.jobid))
Gaia.remove_jobs([job.jobid])

# time execution
print("\nExecution took %s seconds\n" % (time.time() - start_time))

INFO: Login to gaia TAP server [astroquery.gaia.core]
INFO: OK [astroquery.utils.tap.core]
INFO: Login to gaia data server [astroquery.gaia.core]
INFO: OK [astroquery.utils.tap.core]

Starting query for starting value 3000000 and top 3000000 rows


KeyboardInterrupt: 

In [None]:
#/usr/bin/env python
"""
Purpose of file: Download a subset (set by command line argument) of the rows of GAIA DR3, for the columns containing kinematic information 
suitable for parallelization via job submission system like slurm. 

Outputs:
Writes the velocity data according to ADQL_base_script (defined in script). Default format for output files is csv.

NOTES:
-   On inspecting outputs: Check the number of lines (objects) via "wc -l -c filename" where you
    replace filename, but only works as expected for csv files. In general to check file size in a human
    readable format, type "du -sh filename" and in the output "K" is kilobytes, "M" is megabytes and so on.
    For all files in folder do "du -ha"

-   On unzipping: to unzip a folder recursively and overwrite the originals, use "gunzip -r folder_name"
"""

from astroquery.gaia import Gaia
# import sys
import glob
import sys

# For timing execution
import time
start_time = time.time()

# ----------------- Set job parameters ----------------------

# Define login details (necessary to avoid download limits)
username = 'hsu01' # write your username
password = 'Ch!13902986922'   # write your password
Gaia.login(user=username, password=password)

data_dir = "/ocean/projects/phy210068p/hsu1/Ananke_datasets_training/Gaiadr3_data/gaia_download_newest_12062023" # the folder with lots of storage where we'll save the files


# Add TOP x after "SELECT" below to only get these columns for the first x objects (x a natural number) eg "SELECT TOP 10 ..."
# The indentation isnt necessary in the ADQL script but it is good for readability

ADQL_base_script = '''SELECT TOP %s
                        source_id, ra, dec, l,b, parallax, parallax_error, pmra, pmra_error, pmdec, pmdec_error, 
                        parallax_pmra_corr, parallax_pmdec_corr, pmra_pmdec_corr, ruwe, radial_velocity, 
                        radial_velocity_error, parallax_over_error, phot_g_mean_mag,rv_expected_sig_to_noise
                    FROM gaiadr3.gaia_source
                    WHERE parallax_over_error > 10.0
                    OFFSET %s
                '''

row_lim = 3000000
offset = 3000000 * 2 # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10

# Print job info
print("\nStarting query for starting value {} and top {} rows".format(offset,row_lim))

# Define query and job name
query = ADQL_base_script % (row_lim,offset)

jobname = 'DR3_6D_kinematics_{}_to_{}'.format(offset,offset+row_lim) # Sets the output file name too
output_filename = data_dir + jobname + '.csv'

# Check if we already got this data
if len(glob.glob(output_filename))==1:
    print("Cancelling this job, " + output_filename + " already exists")
    print("\nExiting execution...")
    sys.exit()
    
# Run job
job=Gaia.launch_job_async(query, name=jobname, dump_to_file=True, output_format='csv',output_file = output_filename) # fits files are compressed
job.get_results() 

print("Job finished and result saved to " + output_filename + "\n")

# Delete the job from our cache (so we dont hit our quota)
print("Deleting job with id {}".format(job.jobid))
Gaia.remove_jobs([job.jobid])

# time execution
print("\nExecution took %s seconds\n" % (time.time() - start_time))

In [None]:
#/usr/bin/env python
"""
Purpose of file: Download a subset (set by command line argument) of the rows of GAIA DR3, for the columns containing kinematic information 
suitable for parallelization via job submission system like slurm. 

Outputs:
Writes the velocity data according to ADQL_base_script (defined in script). Default format for output files is csv.

NOTES:
-   On inspecting outputs: Check the number of lines (objects) via "wc -l -c filename" where you
    replace filename, but only works as expected for csv files. In general to check file size in a human
    readable format, type "du -sh filename" and in the output "K" is kilobytes, "M" is megabytes and so on.
    For all files in folder do "du -ha"

-   On unzipping: to unzip a folder recursively and overwrite the originals, use "gunzip -r folder_name"
"""

from astroquery.gaia import Gaia
# import sys
import glob
import sys

# For timing execution
import time
start_time = time.time()

# ----------------- Set job parameters ----------------------

# Define login details (necessary to avoid download limits)
username = 'hsu01' # write your username
password = 'Ch!13902986922'   # write your password
Gaia.login(user=username, password=password)

data_dir = "/ocean/projects/phy210068p/hsu1/Ananke_datasets_training/Gaiadr3_data/gaia_download_newest_12062023" # the folder with lots of storage where we'll save the files


# Add TOP x after "SELECT" below to only get these columns for the first x objects (x a natural number) eg "SELECT TOP 10 ..."
# The indentation isnt necessary in the ADQL script but it is good for readability

ADQL_base_script = '''SELECT TOP %s
                        source_id, ra, dec, l,b, parallax, parallax_error, pmra, pmra_error, pmdec, pmdec_error, 
                        parallax_pmra_corr, parallax_pmdec_corr, pmra_pmdec_corr, ruwe, radial_velocity, 
                        radial_velocity_error, parallax_over_error, phot_g_mean_mag,rv_expected_sig_to_noise
                    FROM gaiadr3.gaia_source
                    WHERE parallax_over_error > 10.0
                    OFFSET %s
                '''

row_lim = 3000000
offset = 3000000 * 3 # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10

# Print job info
print("\nStarting query for starting value {} and top {} rows".format(offset,row_lim))

# Define query and job name
query = ADQL_base_script % (row_lim,offset)

jobname = 'DR3_6D_kinematics_{}_to_{}'.format(offset,offset+row_lim) # Sets the output file name too
output_filename = data_dir + jobname + '.csv'

# Check if we already got this data
if len(glob.glob(output_filename))==1:
    print("Cancelling this job, " + output_filename + " already exists")
    print("\nExiting execution...")
    sys.exit()
    
# Run job
job=Gaia.launch_job_async(query, name=jobname, dump_to_file=True, output_format='csv',output_file = output_filename) # fits files are compressed
job.get_results() 

print("Job finished and result saved to " + output_filename + "\n")

# Delete the job from our cache (so we dont hit our quota)
print("Deleting job with id {}".format(job.jobid))
Gaia.remove_jobs([job.jobid])

# time execution
print("\nExecution took %s seconds\n" % (time.time() - start_time))

In [None]:
#/usr/bin/env python
"""
Purpose of file: Download a subset (set by command line argument) of the rows of GAIA DR3, for the columns containing kinematic information 
suitable for parallelization via job submission system like slurm. 

Outputs:
Writes the velocity data according to ADQL_base_script (defined in script). Default format for output files is csv.

NOTES:
-   On inspecting outputs: Check the number of lines (objects) via "wc -l -c filename" where you
    replace filename, but only works as expected for csv files. In general to check file size in a human
    readable format, type "du -sh filename" and in the output "K" is kilobytes, "M" is megabytes and so on.
    For all files in folder do "du -ha"

-   On unzipping: to unzip a folder recursively and overwrite the originals, use "gunzip -r folder_name"
"""

from astroquery.gaia import Gaia
# import sys
import glob
import sys

# For timing execution
import time
start_time = time.time()

# ----------------- Set job parameters ----------------------

# Define login details (necessary to avoid download limits)
username = 'hsu01' # write your username
password = 'Ch!13902986922'   # write your password
Gaia.login(user=username, password=password)

data_dir = "/ocean/projects/phy210068p/hsu1/Ananke_datasets_training/Gaiadr3_data/gaia_download_newest_12062023" # the folder with lots of storage where we'll save the files


# Add TOP x after "SELECT" below to only get these columns for the first x objects (x a natural number) eg "SELECT TOP 10 ..."
# The indentation isnt necessary in the ADQL script but it is good for readability

ADQL_base_script = '''SELECT TOP %s
                        source_id, ra, dec, l,b, parallax, parallax_error, pmra, pmra_error, pmdec, pmdec_error, 
                        parallax_pmra_corr, parallax_pmdec_corr, pmra_pmdec_corr, ruwe, radial_velocity, 
                        radial_velocity_error, parallax_over_error, phot_g_mean_mag,rv_expected_sig_to_noise
                    FROM gaiadr3.gaia_source
                    WHERE parallax_over_error > 10.0
                    OFFSET %s
                '''

row_lim = 3000000
offset = 3000000 * 4 # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10

# Print job info
print("\nStarting query for starting value {} and top {} rows".format(offset,row_lim))

# Define query and job name
query = ADQL_base_script % (row_lim,offset)

jobname = 'DR3_6D_kinematics_{}_to_{}'.format(offset,offset+row_lim) # Sets the output file name too
output_filename = data_dir + jobname + '.csv'

# Check if we already got this data
if len(glob.glob(output_filename))==1:
    print("Cancelling this job, " + output_filename + " already exists")
    print("\nExiting execution...")
    sys.exit()
    
# Run job
job=Gaia.launch_job_async(query, name=jobname, dump_to_file=True, output_format='csv',output_file = output_filename) # fits files are compressed
job.get_results() 

print("Job finished and result saved to " + output_filename + "\n")

# Delete the job from our cache (so we dont hit our quota)
print("Deleting job with id {}".format(job.jobid))
Gaia.remove_jobs([job.jobid])

# time execution
print("\nExecution took %s seconds\n" % (time.time() - start_time))

In [None]:
#/usr/bin/env python
"""
Purpose of file: Download a subset (set by command line argument) of the rows of GAIA DR3, for the columns containing kinematic information 
suitable for parallelization via job submission system like slurm. 

Outputs:
Writes the velocity data according to ADQL_base_script (defined in script). Default format for output files is csv.

NOTES:
-   On inspecting outputs: Check the number of lines (objects) via "wc -l -c filename" where you
    replace filename, but only works as expected for csv files. In general to check file size in a human
    readable format, type "du -sh filename" and in the output "K" is kilobytes, "M" is megabytes and so on.
    For all files in folder do "du -ha"

-   On unzipping: to unzip a folder recursively and overwrite the originals, use "gunzip -r folder_name"
"""

from astroquery.gaia import Gaia
# import sys
import glob
import sys

# For timing execution
import time
start_time = time.time()

# ----------------- Set job parameters ----------------------

# Define login details (necessary to avoid download limits)
username = 'hsu01' # write your username
password = 'Ch!13902986922'   # write your password
Gaia.login(user=username, password=password)

data_dir = "/ocean/projects/phy210068p/hsu1/Ananke_datasets_training/Gaiadr3_data/gaia_download_newest_12062023" # the folder with lots of storage where we'll save the files


# Add TOP x after "SELECT" below to only get these columns for the first x objects (x a natural number) eg "SELECT TOP 10 ..."
# The indentation isnt necessary in the ADQL script but it is good for readability

ADQL_base_script = '''SELECT TOP %s
                        source_id, ra, dec, l,b, parallax, parallax_error, pmra, pmra_error, pmdec, pmdec_error, 
                        parallax_pmra_corr, parallax_pmdec_corr, pmra_pmdec_corr, ruwe, radial_velocity, 
                        radial_velocity_error, parallax_over_error, phot_g_mean_mag,rv_expected_sig_to_noise
                    FROM gaiadr3.gaia_source
                    WHERE parallax_over_error > 10.0
                    OFFSET %s
                '''

row_lim = 3000000
offset = 3000000 * 5 # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10

# Print job info
print("\nStarting query for starting value {} and top {} rows".format(offset,row_lim))

# Define query and job name
query = ADQL_base_script % (row_lim,offset)

jobname = 'DR3_6D_kinematics_{}_to_{}'.format(offset,offset+row_lim) # Sets the output file name too
output_filename = data_dir + jobname + '.csv'

# Check if we already got this data
if len(glob.glob(output_filename))==1:
    print("Cancelling this job, " + output_filename + " already exists")
    print("\nExiting execution...")
    sys.exit()
    
# Run job
job=Gaia.launch_job_async(query, name=jobname, dump_to_file=True, output_format='csv',output_file = output_filename) # fits files are compressed
job.get_results() 

print("Job finished and result saved to " + output_filename + "\n")

# Delete the job from our cache (so we dont hit our quota)
print("Deleting job with id {}".format(job.jobid))
Gaia.remove_jobs([job.jobid])

# time execution
print("\nExecution took %s seconds\n" % (time.time() - start_time))

In [None]:
#/usr/bin/env python
"""
Purpose of file: Download a subset (set by command line argument) of the rows of GAIA DR3, for the columns containing kinematic information 
suitable for parallelization via job submission system like slurm. 

Outputs:
Writes the velocity data according to ADQL_base_script (defined in script). Default format for output files is csv.

NOTES:
-   On inspecting outputs: Check the number of lines (objects) via "wc -l -c filename" where you
    replace filename, but only works as expected for csv files. In general to check file size in a human
    readable format, type "du -sh filename" and in the output "K" is kilobytes, "M" is megabytes and so on.
    For all files in folder do "du -ha"

-   On unzipping: to unzip a folder recursively and overwrite the originals, use "gunzip -r folder_name"
"""

from astroquery.gaia import Gaia
# import sys
import glob
import sys

# For timing execution
import time
start_time = time.time()

# ----------------- Set job parameters ----------------------

# Define login details (necessary to avoid download limits)
username = 'hsu01' # write your username
password = 'Ch!13902986922'   # write your password
Gaia.login(user=username, password=password)

data_dir = "/ocean/projects/phy210068p/hsu1/Ananke_datasets_training/Gaiadr3_data/gaia_download_newest_12062023" # the folder with lots of storage where we'll save the files


# Add TOP x after "SELECT" below to only get these columns for the first x objects (x a natural number) eg "SELECT TOP 10 ..."
# The indentation isnt necessary in the ADQL script but it is good for readability

ADQL_base_script = '''SELECT TOP %s
                        source_id, ra, dec, l,b, parallax, parallax_error, pmra, pmra_error, pmdec, pmdec_error, 
                        parallax_pmra_corr, parallax_pmdec_corr, pmra_pmdec_corr, ruwe, radial_velocity, 
                        radial_velocity_error, parallax_over_error, phot_g_mean_mag,rv_expected_sig_to_noise
                    FROM gaiadr3.gaia_source
                    WHERE parallax_over_error > 10.0
                    OFFSET %s
                '''

row_lim = 3000000
offset = 3000000 * 6 # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10

# Print job info
print("\nStarting query for starting value {} and top {} rows".format(offset,row_lim))

# Define query and job name
query = ADQL_base_script % (row_lim,offset)

jobname = 'DR3_6D_kinematics_{}_to_{}'.format(offset,offset+row_lim) # Sets the output file name too
output_filename = data_dir + jobname + '.csv'

# Check if we already got this data
if len(glob.glob(output_filename))==1:
    print("Cancelling this job, " + output_filename + " already exists")
    print("\nExiting execution...")
    sys.exit()
    
# Run job
job=Gaia.launch_job_async(query, name=jobname, dump_to_file=True, output_format='csv',output_file = output_filename) # fits files are compressed
job.get_results() 

print("Job finished and result saved to " + output_filename + "\n")

# Delete the job from our cache (so we dont hit our quota)
print("Deleting job with id {}".format(job.jobid))
Gaia.remove_jobs([job.jobid])

# time execution
print("\nExecution took %s seconds\n" % (time.time() - start_time))

In [None]:
#/usr/bin/env python
"""
Purpose of file: Download a subset (set by command line argument) of the rows of GAIA DR3, for the columns containing kinematic information 
suitable for parallelization via job submission system like slurm. 

Outputs:
Writes the velocity data according to ADQL_base_script (defined in script). Default format for output files is csv.

NOTES:
-   On inspecting outputs: Check the number of lines (objects) via "wc -l -c filename" where you
    replace filename, but only works as expected for csv files. In general to check file size in a human
    readable format, type "du -sh filename" and in the output "K" is kilobytes, "M" is megabytes and so on.
    For all files in folder do "du -ha"

-   On unzipping: to unzip a folder recursively and overwrite the originals, use "gunzip -r folder_name"
"""

from astroquery.gaia import Gaia
# import sys
import glob
import sys

# For timing execution
import time
start_time = time.time()

# ----------------- Set job parameters ----------------------

# Define login details (necessary to avoid download limits)
username = 'hsu01' # write your username
password = 'Ch!13902986922'   # write your password
Gaia.login(user=username, password=password)

data_dir = "/ocean/projects/phy210068p/hsu1/Ananke_datasets_training/Gaiadr3_data/gaia_download_newest_12062023" # the folder with lots of storage where we'll save the files


# Add TOP x after "SELECT" below to only get these columns for the first x objects (x a natural number) eg "SELECT TOP 10 ..."
# The indentation isnt necessary in the ADQL script but it is good for readability

ADQL_base_script = '''SELECT TOP %s
                        source_id, ra, dec, l,b, parallax, parallax_error, pmra, pmra_error, pmdec, pmdec_error, 
                        parallax_pmra_corr, parallax_pmdec_corr, pmra_pmdec_corr, ruwe, radial_velocity, 
                        radial_velocity_error, parallax_over_error, phot_g_mean_mag,rv_expected_sig_to_noise
                    FROM gaiadr3.gaia_source
                    WHERE parallax_over_error > 10.0
                    OFFSET %s
                '''

row_lim = 3000000
offset = 3000000 * 7 # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10

# Print job info
print("\nStarting query for starting value {} and top {} rows".format(offset,row_lim))

# Define query and job name
query = ADQL_base_script % (row_lim,offset)

jobname = 'DR3_6D_kinematics_{}_to_{}'.format(offset,offset+row_lim) # Sets the output file name too
output_filename = data_dir + jobname + '.csv'

# Check if we already got this data
if len(glob.glob(output_filename))==1:
    print("Cancelling this job, " + output_filename + " already exists")
    print("\nExiting execution...")
    sys.exit()
    
# Run job
job=Gaia.launch_job_async(query, name=jobname, dump_to_file=True, output_format='csv',output_file = output_filename) # fits files are compressed
job.get_results() 

print("Job finished and result saved to " + output_filename + "\n")

# Delete the job from our cache (so we dont hit our quota)
print("Deleting job with id {}".format(job.jobid))
Gaia.remove_jobs([job.jobid])

# time execution
print("\nExecution took %s seconds\n" % (time.time() - start_time))

In [None]:
#/usr/bin/env python
"""
Purpose of file: Download a subset (set by command line argument) of the rows of GAIA DR3, for the columns containing kinematic information 
suitable for parallelization via job submission system like slurm. 

Outputs:
Writes the velocity data according to ADQL_base_script (defined in script). Default format for output files is csv.

NOTES:
-   On inspecting outputs: Check the number of lines (objects) via "wc -l -c filename" where you
    replace filename, but only works as expected for csv files. In general to check file size in a human
    readable format, type "du -sh filename" and in the output "K" is kilobytes, "M" is megabytes and so on.
    For all files in folder do "du -ha"

-   On unzipping: to unzip a folder recursively and overwrite the originals, use "gunzip -r folder_name"
"""

from astroquery.gaia import Gaia
# import sys
import glob
import sys

# For timing execution
import time
start_time = time.time()

# ----------------- Set job parameters ----------------------

# Define login details (necessary to avoid download limits)
username = 'hsu01' # write your username
password = 'Ch!13902986922'   # write your password
Gaia.login(user=username, password=password)

data_dir = "/ocean/projects/phy210068p/hsu1/Ananke_datasets_training/Gaiadr3_data/gaia_download_newest_12062023" # the folder with lots of storage where we'll save the files


# Add TOP x after "SELECT" below to only get these columns for the first x objects (x a natural number) eg "SELECT TOP 10 ..."
# The indentation isnt necessary in the ADQL script but it is good for readability

ADQL_base_script = '''SELECT TOP %s
                        source_id, ra, dec, l,b, parallax, parallax_error, pmra, pmra_error, pmdec, pmdec_error, 
                        parallax_pmra_corr, parallax_pmdec_corr, pmra_pmdec_corr, ruwe, radial_velocity, 
                        radial_velocity_error, parallax_over_error, phot_g_mean_mag,rv_expected_sig_to_noise
                    FROM gaiadr3.gaia_source
                    WHERE parallax_over_error > 10.0
                    OFFSET %s
                '''

row_lim = 3000000
offset = 3000000 * 8 # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10

# Print job info
print("\nStarting query for starting value {} and top {} rows".format(offset,row_lim))

# Define query and job name
query = ADQL_base_script % (row_lim,offset)

jobname = 'DR3_6D_kinematics_{}_to_{}'.format(offset,offset+row_lim) # Sets the output file name too
output_filename = data_dir + jobname + '.csv'

# Check if we already got this data
if len(glob.glob(output_filename))==1:
    print("Cancelling this job, " + output_filename + " already exists")
    print("\nExiting execution...")
    sys.exit()
    
# Run job
job=Gaia.launch_job_async(query, name=jobname, dump_to_file=True, output_format='csv',output_file = output_filename) # fits files are compressed
job.get_results() 

print("Job finished and result saved to " + output_filename + "\n")

# Delete the job from our cache (so we dont hit our quota)
print("Deleting job with id {}".format(job.jobid))
Gaia.remove_jobs([job.jobid])

# time execution
print("\nExecution took %s seconds\n" % (time.time() - start_time))

In [None]:
#/usr/bin/env python
"""
Purpose of file: Download a subset (set by command line argument) of the rows of GAIA DR3, for the columns containing kinematic information 
suitable for parallelization via job submission system like slurm. 

Outputs:
Writes the velocity data according to ADQL_base_script (defined in script). Default format for output files is csv.

NOTES:
-   On inspecting outputs: Check the number of lines (objects) via "wc -l -c filename" where you
    replace filename, but only works as expected for csv files. In general to check file size in a human
    readable format, type "du -sh filename" and in the output "K" is kilobytes, "M" is megabytes and so on.
    For all files in folder do "du -ha"

-   On unzipping: to unzip a folder recursively and overwrite the originals, use "gunzip -r folder_name"
"""

from astroquery.gaia import Gaia
# import sys
import glob
import sys

# For timing execution
import time
start_time = time.time()

# ----------------- Set job parameters ----------------------

# Define login details (necessary to avoid download limits)
username = 'hsu01' # write your username
password = 'Ch!13902986922'   # write your password
Gaia.login(user=username, password=password)

data_dir = "/ocean/projects/phy210068p/hsu1/Ananke_datasets_training/Gaiadr3_data/gaia_download_newest_12062023" # the folder with lots of storage where we'll save the files


# Add TOP x after "SELECT" below to only get these columns for the first x objects (x a natural number) eg "SELECT TOP 10 ..."
# The indentation isnt necessary in the ADQL script but it is good for readability

ADQL_base_script = '''SELECT TOP %s
                        source_id, ra, dec, l,b, parallax, parallax_error, pmra, pmra_error, pmdec, pmdec_error, 
                        parallax_pmra_corr, parallax_pmdec_corr, pmra_pmdec_corr, ruwe, radial_velocity, 
                        radial_velocity_error, parallax_over_error, phot_g_mean_mag,rv_expected_sig_to_noise
                    FROM gaiadr3.gaia_source
                    WHERE parallax_over_error > 10.0
                    OFFSET %s
                '''

row_lim = 3000000
offset = 3000000 * 9 # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10

# Print job info
print("\nStarting query for starting value {} and top {} rows".format(offset,row_lim))

# Define query and job name
query = ADQL_base_script % (row_lim,offset)

jobname = 'DR3_6D_kinematics_{}_to_{}'.format(offset,offset+row_lim) # Sets the output file name too
output_filename = data_dir + jobname + '.csv'

# Check if we already got this data
if len(glob.glob(output_filename))==1:
    print("Cancelling this job, " + output_filename + " already exists")
    print("\nExiting execution...")
    sys.exit()
    
# Run job
job=Gaia.launch_job_async(query, name=jobname, dump_to_file=True, output_format='csv',output_file = output_filename) # fits files are compressed
job.get_results() 

print("Job finished and result saved to " + output_filename + "\n")

# Delete the job from our cache (so we dont hit our quota)
print("Deleting job with id {}".format(job.jobid))
Gaia.remove_jobs([job.jobid])

# time execution
print("\nExecution took %s seconds\n" % (time.time() - start_time))