## Creating npy arrays for the labels 

In [1]:
import pandas as pd
import numpy as np
from astropy.table import Table

In [2]:
# Load LSB and artifacts CSV files
lsb_data = pd.read_csv('../Datasets_DeepShadows/Datasets/random_LSBGs_all.csv')
art_data1 = pd.read_csv('../Datasets_DeepShadows/Datasets/random_negative_all_1.csv')
art_data2 = pd.read_csv('../Datasets_DeepShadows/Datasets/random_negative_all_2.csv')

In [3]:
print("Numner of LSB", len(lsb_data))
print("Numner of artifacts 1", len(art_data1))
print("Numner of artifacts 2", len(art_data2))

Numner of LSB 19996
Numner of artifacts 1 20000
Numner of artifacts 2 20000


In [4]:
art_data2.head()

Unnamed: 0.1,Unnamed: 0,coadd_id,ra,dec,A_IMAGE,B_IMAGE,mag_auto_g,flux_radius_g,mu_max_g,mu_max_model_g,...,mu_max_r,mu_max_model_r,mu_eff_model_r,mu_mean_model_r,mag_auto_i,flux_radius_i,mu_max_i,mu_max_model_i,mu_eff_model_i,mu_mean_model_i
0,0,316310989.0,40.09074,-4.445404,4.71701,2.354342,21.472155,3.29344,24.441231,24.964153,...,23.634071,24.127546,25.925787,25.228838,20.717335,3.198911,23.218655,23.792841,25.579319,24.88649
1,1,497492000.0,65.921443,-22.396546,6.285618,3.290936,21.466034,2.864123,24.185545,24.437744,...,23.876196,23.950928,25.750076,25.053593,20.791466,2.69054,23.249422,23.526728,25.328304,24.628597
2,2,508340073.0,70.732828,-41.048892,4.047899,1.414522,22.233032,2.719934,23.815443,24.855452,...,23.831644,24.261894,26.044979,25.336315,21.610502,2.131606,23.459972,23.383272,25.165167,24.45982
3,3,503163353.0,71.141644,-26.290225,4.369693,4.113105,21.683304,2.599894,23.9091,23.563255,...,23.640265,23.607225,25.409803,24.710609,20.85494,2.551759,23.435854,23.439079,25.239449,24.540735
4,4,335599317.0,45.137313,-15.279261,2.830348,1.821607,21.584328,3.258073,24.377985,25.106985,...,23.945517,24.284058,26.09033,25.391094,21.133919,3.044972,23.703569,24.384443,26.189703,25.49193


In [5]:
# Load training, validation, and test set CSV files
train_data = pd.read_csv('../Datasets_DeepShadows/Datasets/Baseline_training.csv')
val_data = pd.read_csv('../Datasets_DeepShadows/Datasets/Baseline_validation.csv')
test_data = pd.read_csv('../Datasets_DeepShadows/Datasets/Baseline_test.csv')


In [10]:
train_data

Unnamed: 0.1,Unnamed: 0,ra,dec
0,0,26.108572,-6.439318
1,1,45.534353,-37.171315
2,2,42.025593,-11.286475
3,3,11.829991,-24.476106
4,4,16.885239,-19.114691
...,...,...,...
29995,29995,306.935903,-50.728377
29996,29996,95.102734,-47.774723
29997,29997,60.079080,-16.200399
29998,29998,42.634387,-4.760887


In [7]:
# Check if RA and DEC coordinates match with galaxies or non-galaxies
def assign_labels(dataset):
    labels = []
    for _, row in dataset.iterrows():
        # Check if the coordinates match with galaxies or non-galaxies
        if ((row['ra'], row['dec']) in lsb_data[['ra', 'dec']].values):
            labels.append(1)  # Assign label 1 for galaxies
        elif ((row['ra'], row['dec']) in art_data2[['ra', 'dec']].values):
            labels.append(0)  # Assign label 0 for non-galaxies
        else:
            labels.append(np.nan)  # If coordinates don't match, assign NaN or handle accordingly
    return labels

In [8]:
# Assign labels for training, validation, and test sets
y_train = assign_labels(train_data)
y_val = assign_labels(val_data)
y_test = assign_labels(test_data)

In [9]:
# Save label arrays as .npy files
np.save('../Datasets_DeepShadows/Galaxies_data/y_train.npy', y_train)
np.save('../Datasets_DeepShadows/Galaxies_data/y_val.npy', y_val)
np.save('../Datasets_DeepShadows/Galaxies_data/y_test.npy', y_test)