# Create subsets à 10000 spectra for full sample runs

## Author(s): Sven Buder (SB, WG4)

### History:
180926 SB Created

In [9]:
import numpy as np
import astropy.io.fits as pyfits

In [15]:
sobject_data = pyfits.getdata('sobject_iraf_53_2MASS_GaiaDR2_WISE_PanSTARRSDR1_BailerJones_K2seis.fits',1)

## Apply quality cut: PLX available and FLAG_GUESS <= 8

In [21]:
print('initial set:           '+str(len(sobject_data)))
quality_cut = np.isfinite(sobject_data['parallax']) & (sobject_data['flag_guess'] <= 8)

sobject_data = sobject_data[quality_cut]

u1, sobject_data_index = np.unique(sobject_data['sobject_id'], return_index=True)

sobject_data = sobject_data[sobject_data_index]
print('set after quality cut: '+str(len(sobject_data)))

initial set:           652799
set after quality cut: 652799


## Create subsets after sorting by effective temperature

In [22]:
sobject_data = np.sort(sobject_data,order='teff_guess')

In [5]:
for each_subset in range(len(sobject_data)/10000+1):
    
    subset = sobject_data[each_subset*10000:np.min([(each_subset+1)*10000,len(sobject_data)])]
    np.savetxt('10k_subsets/GALAH_10k_'+str(each_subset)+'_lbol',zip(['10k_'+str(each_subset)+'_lbol' for x in range(len(subset))],subset['s',['DR3' for x in range(len(subset))]),fmt='%s')