In [4]:
import pyvo as vo
import pandas as pd
import numpy as np
import os
from astropy.time import Time
from astropy import units as u
from tqdm import tqdm, trange
from math import ceil
from astropy.time import TimeDelta
import time
import threading
import multiprocessing

In [48]:
tap_service = vo.dal.TAPService("https://api.skymapper.nci.org.au/public/tap/")   
query = f"""SELECT image_id, "date",exp_time FROM dr2.images"""
tap_results = tap_service.search(query, timeout = 600)
df = pd.DataFrame(tap_results) 
df = df.astype({"image_id": str}, errors='raise') 
df.rename(columns = {'image_id':'exposure_id'}, inplace = True)
df.rename(columns = {'date':'exposure_mjd_start'}, inplace = True)
df.rename(columns = {'exp_time':'exposure_duration'}, inplace = True)
DATA_DIR = "/epyc/projects/adam_datasets/skyMapper_dr2"
file_name = os.path.join(DATA_DIR, f"Exposuretime.h5")
df.to_hdf(path_or_buf= file_name,index=False,append=False,key='data',format = 'table')  

In [53]:
def join_date(start,end):
    datedf = pd.read_hdf("/epyc/projects/adam_datasets/skyMapper_dr2/Exposuretime.h5","data", mode = 'r')
    for index in range(start,end):
        date = Time(index, format='mjd') #Timeobject
        date = date.to_value('iso',subfmt='date') #String formate 2014-03-20
        DATA_DIR = "/epyc/projects/adam_datasets/skyMapper_dr2/data"
        file_name = os.path.join(DATA_DIR, f"dr2_observations_{date}.h5")
        if(not os.path.isfile(file_name)):
            continue

        store = pd.read_hdf(file_name, "data",mode = 'r')
        nonNUll = store.loc[store['obj_id'] != 0]
        nonNUll = pd.merge(nonNUll, datedf, on = 'exposure_id', how='left')
        nonNUll['exposure_mjd_mid'] = nonNUll['mjd_utc']
        DATA_DIR = "/epyc/projects/adam_datasets/skyMapper_dr2/data_new"
        file_name = os.path.join(DATA_DIR, f"dr2_observations_{date}.h5")
        nonNUll.to_hdf(path_or_buf= file_name,index=False,append=True,key='data',format = 'table',min_itemsize={ 'obs_id' : 35 })

In [54]:
#56730,58192
jobs = []
Slice = ceil ((58192-56730) / 20)
for i in range(20):
	process = multiprocessing.Process(target=join_date, args=(56730+Slice * i, 56730+Slice * (i+1)))
	jobs.append(process)

for j in jobs: 
	j.start()

for j in jobs:
	j.join()

In [74]:
def to_csv(start,end):
    for index in range(start,end):
        date = Time(index, format='mjd') #Timeobject
        date = date.to_value('iso',subfmt='date') #String formate 2014-03-20
        DATA_DIR = "/epyc/projects/adam_datasets/skyMapper_dr2/data_new"
        file_name = os.path.join(DATA_DIR, f"dr2_observations_{date}.h5")
        if(not os.path.isfile(file_name)):
            continue

        
        store = pd.read_hdf(file_name, "data",mode = 'r')
        store.rename(columns = {'mjd_utc':'mjd'}, inplace = True)
        store = store[(store['mag'].notnull())]
        store = store.loc[:, ~store.columns.str.contains('^Unnamed')]
        DATA_DIR = "/epyc/projects/adam_datasets/skyMapper_dr2/data_csv"
        file_name = os.path.join(DATA_DIR, f"dr2_observations_{date}.csv")
        store.to_csv(path_or_buf= file_name)

In [None]:
#56730,58192
jobs = []
Slice = ceil((58192-56730) / 20)
for i in range(20):
	process = multiprocessing.Process(target=to_csv, args=(56730+Slice * i, 56730+Slice * (i+1)))
	jobs.append(process)

for j in jobs: 
	j.start()

for j in jobs:
	j.join()

In [56]:
store = pd.read_hdf("/epyc/projects/adam_datasets/skyMapper_dr2/data/dr2_observations_2016-09-04.h5","data", mode = 'r')

In [57]:
DATA_DIR = "/epyc/projects/adam_datasets/skyMapper_dr2/data_csv"
file_name = os.path.join(DATA_DIR, f"dr2_observations.csv")
store.to_csv(path_or_buf= file_name)

In [39]:
date = pd.read_hdf("/epyc/projects/adam_datasets/skyMapper_dr2/Exposuretime.h5","data", mode = 'r')

In [30]:
date = date.astype({"image_id": str}, errors='raise') 
date.rename(columns = {'image_id':'exposure_id'}, inplace = True)

In [7]:
nullobject = store.loc[store['obj_id'] != 0]

In [31]:
pd.merge(nullobject, date, on = 'exposure_id', how='left')

Unnamed: 0,dec,dec_sigma,exposure_id,filter,mag,mag_sigma,mjd_utc,obj_id,observatory_code,ra,ra_sigma,obs_id,date,exp_time
0,-45.604924,0.000010,20160904084540,i,18.432289,0.0546,57635.365648,361539264,Q55,265.913931,0.000010,361539264320160904084540,57635.365069,100.0
1,-45.604664,0.000040,20160904084540,i,18.620745,0.2667,57635.365648,361539268,Q55,265.920774,0.000009,361539268320160904084540,57635.365069,100.0
2,-45.606103,0.000010,20160904084540,i,18.365191,0.0250,57635.365648,361539276,Q55,265.927916,0.000013,361539276320160904084540,57635.365069,100.0
3,-45.603884,0.000010,20160904084540,i,18.124685,0.1390,57635.365648,361539277,Q55,265.930351,0.000010,361539277320160904084540,57635.365069,100.0
4,-45.605915,0.000010,20160904084540,i,18.647169,0.0142,57635.365648,361539278,Q55,265.931211,0.000016,361539278320160904084540,57635.365069,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3622008,-52.999064,0.000016,20160904190058,i,15.187358,0.0688,57635.792940,471670370,Q55,100.402363,0.000017,471670370320160904190058,57635.792361,100.0
3622009,-53.014990,0.000018,20160904190058,i,19.226788,0.0171,57635.792940,471670372,Q55,100.395947,0.000033,471670372320160904190058,57635.792361,100.0
3622010,-53.012425,0.000007,20160904190058,i,18.257359,0.0071,57635.792940,471670373,Q55,100.385799,0.000007,471670373320160904190058,57635.792361,100.0
3622011,-53.004036,0.000007,20160904190058,i,19.111158,0.0194,57635.792940,471670374,Q55,100.396547,0.000006,471670374320160904190058,57635.792361,100.0


In [40]:
store = pd.read_hdf("/epyc/projects/adam_datasets/skyMapper_dr2/data/dr2_observations_2016-09-04.h5","data",mode = 'r')
nonNUll = store.loc[store['obj_id'] != 0]
nonNUll = pd.merge(nonNUll, date, on = 'exposure_id', how='left')
nonNUll['exposure_mjd_mid'] = nonNUll['mjd_utc']
DATA_DIR = "/epyc/projects/adam_datasets/skyMapper_dr2/data_new"
file_name = os.path.join(DATA_DIR, f"dr2_observations_2016-09-04.h5")
nonNUll.to_hdf(path_or_buf= file_name,index=False,append=True,key='data',format = 'table',min_itemsize={ 'obs_id' : 35 })

In [None]:
nonNUll

python /epyc/ssd/users/yyang35/precovery/precovery/ingest/index_observations.py data_csv database_new SKYMAPPER_DR2 --nside 32 --cpu_count 20 --dataset_name "SkyMapper Southern Sky Survey (DR2)" --reference_doi https://doi.org/10.3847/1538-3881/abd6e1 --documentation_url https://skymapper.anu.edu.au/data-release/dr2/ --sia_url https://api.skymapper.nci.org.au/aus/siap/dr3/query? 

In [63]:
store = pd.read_csv("/epyc/projects/adam_datasets/skyMapper_dr2/data_csv/dr2_observations_2016-09-04.csv")

In [69]:
store = store.loc[:, ~store.columns.str.contains('^Unnamed')]

In [70]:
store

Unnamed: 0,dec,dec_sigma,exposure_id,filter,mag,mag_sigma,mjd,obj_id,observatory_code,ra,ra_sigma,obs_id,exposure_mjd_start,exposure_duration,exposure_mjd_mid
0,-45.604924,0.000010,20160904084540,i,18.432290,0.0546,57635.365648,361539264,Q55,265.913931,0.000010,361539264320160904084540,57635.365069,100.0,57635.365648
1,-45.604664,0.000040,20160904084540,i,18.620745,0.2667,57635.365648,361539268,Q55,265.920774,0.000009,361539268320160904084540,57635.365069,100.0,57635.365648
2,-45.606103,0.000010,20160904084540,i,18.365190,0.0250,57635.365648,361539276,Q55,265.927916,0.000013,361539276320160904084540,57635.365069,100.0,57635.365648
3,-45.603884,0.000010,20160904084540,i,18.124685,0.1390,57635.365648,361539277,Q55,265.930351,0.000010,361539277320160904084540,57635.365069,100.0,57635.365648
4,-45.605915,0.000010,20160904084540,i,18.647170,0.0142,57635.365648,361539278,Q55,265.931211,0.000016,361539278320160904084540,57635.365069,100.0,57635.365648
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3622008,-52.999064,0.000016,20160904190058,i,15.187358,0.0688,57635.792940,471670370,Q55,100.402363,0.000017,471670370320160904190058,57635.792361,100.0,57635.792940
3622009,-53.014990,0.000018,20160904190058,i,19.226788,0.0171,57635.792940,471670372,Q55,100.395947,0.000033,471670372320160904190058,57635.792361,100.0,57635.792940
3622010,-53.012425,0.000007,20160904190058,i,18.257359,0.0071,57635.792940,471670373,Q55,100.385799,0.000007,471670373320160904190058,57635.792361,100.0,57635.792940
3622011,-53.004036,0.000007,20160904190058,i,19.111158,0.0194,57635.792940,471670374,Q55,100.396547,0.000006,471670374320160904190058,57635.792361,100.0,57635.792940


In [17]:
store.dtypes

Unnamed: 0              int64
dec                   float64
dec_sigma             float64
exposure_id             int64
filter                 object
mag                   float64
mag_sigma             float64
mjd                   float64
obj_id                  int64
observatory_code       object
ra                    float64
ra_sigma              float64
obs_id                 object
exposure_mjd_start    float64
exposure_duration     float64
exposure_mjd_mid      float64
dtype: object

In [36]:
store[(store['mag'].isnull())]

Unnamed: 0,dec,dec_sigma,exposure_id,filter,mag,mag_sigma,mjd_utc,obj_id,observatory_code,ra,ra_sigma,obs_id
3480990,-3.447088,0.0,20160904160052,g,,,57635.66787,0,Q55,43.930887,0.0,3480990#3615132261720160904084540


In [44]:
store = pd.read_hdf("/epyc/projects/adam_datasets/skyMapper_dr2/data/dr2_observations_2016-09-04.h5","data",mode = 'r')

In [47]:
store.loc[store['mag'] == None]

Unnamed: 0,dec,dec_sigma,exposure_id,filter,mag,mag_sigma,mjd_utc,obj_id,observatory_code,ra,ra_sigma,obs_id


In [55]:
store

Unnamed: 0,dec,dec_sigma,exposure_id,filter,mag,mag_sigma,mjd_utc,obj_id,observatory_code,ra,ra_sigma,obs_id
0,-45.604924,0.000010,20160904084540,i,18.432289,0.0546,57635.365648,361539264,Q55,265.913931,0.000010,361539264320160904084540
1,-45.604664,0.000040,20160904084540,i,18.620745,0.2667,57635.365648,361539268,Q55,265.920774,0.000009,361539268320160904084540
2,-45.606103,0.000010,20160904084540,i,18.365191,0.0250,57635.365648,361539276,Q55,265.927916,0.000013,361539276320160904084540
3,-45.603884,0.000010,20160904084540,i,18.124685,0.1390,57635.365648,361539277,Q55,265.930351,0.000010,361539277320160904084540
4,-45.605915,0.000010,20160904084540,i,18.647169,0.0142,57635.365648,361539278,Q55,265.931211,0.000016,361539278320160904084540
...,...,...,...,...,...,...,...,...,...,...,...,...
3641092,-52.999064,0.000016,20160904190058,i,15.187358,0.0688,57635.792940,471670370,Q55,100.402363,0.000017,471670370320160904190058
3641093,-53.014990,0.000018,20160904190058,i,19.226788,0.0171,57635.792940,471670372,Q55,100.395947,0.000033,471670372320160904190058
3641094,-53.012425,0.000007,20160904190058,i,18.257359,0.0071,57635.792940,471670373,Q55,100.385799,0.000007,471670373320160904190058
3641095,-53.004036,0.000007,20160904190058,i,19.111158,0.0194,57635.792940,471670374,Q55,100.396547,0.000006,471670374320160904190058


In [62]:
store[store['mag'].isnull()]

Unnamed: 0,dec,dec_sigma,exposure_id,filter,mag,mag_sigma,mjd_utc,obj_id,observatory_code,ra,ra_sigma,obs_id


In [61]:
store = store[(store['mag'].notnull())]