In [1]:
# Copyright 2023 resspect software
# Author: Emille E. O. Ishida
#
# created on 2 March 2023
#
# Licensed MIT License;
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://opensource.org/license/mit/
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from resspect import get_snpcc_metric

In [3]:
# user input
strategy = 'UncSampling'
train = 20
batch = 1

In [4]:
# read queried objects
if strategy == 'RandomSampling':
    fname_queried = '/media/RESSPECT/data/PLAsTiCC/for_pipeline/DDF/learn_loop_results/' + \
                    'queried/' + strategy + '/queried_' + strategy + '_' + str(train) + '_batch' + str(batch) + '.csv'
    
else:
    fname_queried = '/media/RESSPECT/data/PLAsTiCC/for_pipeline/DDF/learn_loop_results/' + \
                    'queried/' + strategy + '/batch' + str(batch) + '/queried_' + strategy + '_' + str(train) + '_batch' + str(batch) + '.csv'

data_queried = pd.read_csv(fname_queried)

# make sure to get features corresponding to full light curves
fname_pool = '/media/RESSPECT/data/PLAsTiCC/for_pipeline/DDF/features/PLAsTiCC_Bazin_pool.csv'
features_pool = pd.read_csv(fname_pool)

flag_queried = np.array([item in data_queried['id'].values for item in features_pool['id'].values])
data_train = features_pool[flag_queried]
train_labels = data_train['type'].values =='Ia'

# read validation sample
data_validation = pd.read_csv('/media/RESSPECT/data/PLAsTiCC/for_pipeline/DDF/features/PLAsTiCC_Bazin_validation.csv')
validation_labels = data_validation['type'].values == 'Ia'

In [5]:
# train classifier
clf = RandomForestClassifier(n_estimators=1000)
clf.fit(data_train.values[:365,5:], train_labels[:365]) 
predictions = clf.predict(data_validation.values[:,5:])               

In [6]:
sum(predictions)

0

In [7]:
get_snpcc_metric(predictions, validation_labels)

(['accuracy', 'efficiency', 'purity', 'fom'], [0.15794621026894864, 0.0, 0, 0])

In [8]:
# save sample for cosmology
fname_fitres = '/media/RESSPECT/data/PLAsTiCC/for_pipeline/DDF/initial_samples/DDF_validation_fitres.csv'
data_fitres = pd.read_csv(fname_fitres, comment='#')

photo_ia_flag = np.array([item in data_validation[predictions]['id'].values for item in data_fitres['CID'].values])
data_photo_ia = data_fitres[photo_ia_flag]

In [9]:
data_photo_ia.shape

(0, 111)

In [10]:
data_photo_ia.to_csv('/media/RESSPECT/data/PLAsTiCC/for_pipeline/DDF/learn_loop_results/' + \
                     'cosmo_samples/' + strategy + '_' + str(train) + '_batch' + str(batch) + '_1year.csv')