In [11]:
# Copyright 2023 resspect software
# Author: Emille E. O. Ishida
#
# created on 17 January 2023
#
# Licensed MIT License;
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://opensource.org/license/mit/
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [12]:
import pandas as pd
import numpy as np
import progressbar

In [13]:
field = 'DDF'

In [10]:
# separate relevant training sample
fname_train_meta = '/media/RESSPECT/data/PLAsTiCC/PLAsTiCC_zenodo/plasticc_train_metadata.csv'
zenodo_train_meta = pd.read_csv(fname_train_meta)

field_meta_flag = (field == 'DDF') == zenodo_train_meta['ddf_bool']
field_train_meta = zenodo_train_meta[field_meta_flag]
field_train_meta.to_csv('/media/RESSPECT/data/PLAsTiCC/for_pipeline/' + \
                         field + '/initial_samples/' + field + '_train_metadata.csv')

fname_train_lc = '/media/RESSPECT/data/PLAsTiCC/PLAsTiCC_zenodo/plasticc_train_lightcurves.csv.gz'
zenodo_train_lc = pd.read_csv(fname_train_lc)

field_lc_flag = np.array([item in field_train_meta['object_id'].values for item in zenodo_train_lc['object_id'].values])
field_train_lc = zenodo_train_lc[field_lc_flag]
field_train_lc.to_csv('/media/RESSPECT/data/PLAsTiCC/for_pipeline/' + field + '/initial_samples/' + \
                       field + '_train_lightcurves_01.csv')

In [14]:
# read zenodo metadata
fname_test_meta = '/media/RESSPECT/data/PLAsTiCC/PLAsTiCC_zenodo/plasticc_test_metadata.csv'
zenodo_test_meta = pd.read_csv(fname_test_meta)

In [6]:
# read validation sample
fname_val_fitres = '/media/RESSPECT/data/PLAsTiCC/for_pipeline/' + field + '/initial_samples/' + \
                    field + '_validation_fitres.csv'
data_val_fitres = pd.read_csv(fname_val_fitres)
val_fitres_ids = data_val_fitres['CID'].values

# get metadata for validation sample and save it to file
flag_val_meta = np.array([item in val_fitres_ids for item in zenodo_test_meta['object_id'].values])
val_meta = zenodo_test_meta[flag_val_meta]
val_meta.to_csv('/media/RESSPECT/data/PLAsTiCC/for_pipeline/' + field + '/initial_samples/' + \
                field + '_validation_metadata.csv')

In [7]:
# read test sample
fname_test_fitres = '/media/RESSPECT/data/PLAsTiCC/for_pipeline/' + field + '/initial_samples/' + \
                    field + '_test_fitres.csv'

data_test_fitres = pd.read_csv(fname_test_fitres)
test_fitres_ids = data_test_fitres['CID'].values

# get metadata for test sample and save it to file
flag_test_meta = np.array([item in test_fitres_ids for item in zenodo_test_meta['object_id'].values])
test_meta = zenodo_test_meta[flag_test_meta]
test_meta.to_csv('/media/RESSPECT/data/PLAsTiCC/for_pipeline/' + field + '/initial_samples/' + \
                field + '_test_metadata.csv')

In [8]:
# read pool sample
fname_pool_metadata = '/media/RESSPECT/data/PLAsTiCC/for_pipeline/' + \
                      field + '/initial_samples/' + field + '_pool_metadata.csv'
data_pool_meta = pd.read_csv(fname_pool_metadata)

In [9]:
# Separate light curves

val_lc_list = []
test_lc_list = []

for i in progressbar.progressbar(range(1, 12)):
    fname_lc = '/media/RESSPECT/data/PLAsTiCC/PLAsTiCC_zenodo/plasticc_test_lightcurves_' + \
               str(i).zfill(2) + '.csv.gz'
    lc_temp = pd.read_csv(fname_lc)
    
    flag_val_temp = [item in val_meta['object_id'].values for item in lc_temp['object_id'].values]
    
    if sum(flag_val_temp) > 0:
        val_lc = lc_temp[flag_val_temp]
        val_lc.to_csv('/media/RESSPECT/data/PLAsTiCC/for_pipeline/' + field + '/initial_samples/' + \
                field + '_val_lightcurves' + str(i).zfill(2) + '.csv')
        print('Wrote ' + field + '_validation_lightcurves_' + str(i).zfill(2) + '.csv')
        
    flag_test_temp = [item in test_meta['object_id'].values for item in lc_temp['object_id'].values]
    
    if sum(flag_test_temp) > 0:
        test_lc = lc_temp[flag_test_temp]
        test_lc.to_csv('/media/RESSPECT/data/PLAsTiCC/for_pipeline/' + field + '/initial_samples/' + \
                field + '_test_lightcurves_' + str(i).zfill(2) + '.csv')  
        print('Wrote ' + field + '_test_lightcurves_' + str(i).zfill(2) + '.csv')
    
    
    flag_pool_temp = [item in data_pool_meta['object_id'].values for item in lc_temp['object_id'].values]
    
    if sum(flag_pool_temp) > 0:
        lc_temp[flag_pool_temp].to_csv('/media/RESSPECT/data/PLAsTiCC/for_pipeline/' + field + '/initial_samples/' + \
                field + '_pool_lightcurves_' + str(i).zfill(2) + '.csv')
        
        print('Wrote ' + field + '_pool_lightcurves_' + str(i).zfill(2) + '.csv')
        

  0% (0 of 11) |                         | Elapsed Time: 0:00:00 ETA:  --:--:--

Wrote DDF_val_lightcurves01.csv
Wrote DDF_test_lightcurves01.csv


  9% (1 of 11) |##                       | Elapsed Time: 0:10:33 ETA:   1:45:37

Wrote DDF_pool_lightcurves01.csv


100% (11 of 11) |########################| Elapsed Time: 6:57:07 Time:  6:57:07
