In [36]:
import numpy as np
import pandas as pd
import astropy.io.fits as pyfits
import matplotlib.pyplot as plt
import glob
from astropy.table import Table 
from spectres import spectres

In [81]:
missing_data=pd.read_csv("/home/praveen/Dropbox/Masters/python-code/galah-file-read-test/data/missing_data_products_all.txt",delim_whitespace=True)

In [82]:
missing_data.head()

Unnamed: 0,Unnamed: 1,survey,data_release,source_name,schema_key
GALAH,Data,Release,3,131116001001192,galah.dr3.spectra.hermes.spectrum_1d
GALAH,Data,Release,3,131116001001279,galah.dr3.spectra.hermes.spectrum_1d
GALAH,Data,Release,3,131118001901243,galah.dr3.spectra.hermes.spectrum_1d
GALAH,Data,Release,3,131118001901332,galah.dr3.spectra.hermes.spectrum_1d
GALAH,Data,Release,3,131118002901190,galah.dr3.spectra.hermes.spectrum_1d


In [83]:
missing_data_np=missing_data["source_name"].to_numpy()

In [84]:
df_missing_data = pd.DataFrame(data=missing_data_np, columns=["sobject_id"])

In [85]:
df_missing_data.head()

Unnamed: 0,sobject_id
0,131116001001192
1,131116001001279
2,131118001901243
3,131118001901332
4,131118002901190


In [86]:
data = Table.read('/home/praveen/Dropbox/Masters/emission-mnras-cotar/data.fits', format='fits')

In [87]:
df_full = data.to_pandas()

In [88]:
df_emiss_full = df_full.loc[df_full['emiss'] == True]

In [89]:
df_emiss_full_ids = df_emiss_full[["sobject_id"]].copy()

In [90]:
df_emiss_full_ids.head()

Unnamed: 0,sobject_id
231,131116000501262
332,131116000501383
420,131116001001192
432,131116001001213
462,131116001001279


In [91]:
df_temp = pd.merge(df_emiss_full_ids, df_missing_data, how='outer', indicator=True)

In [92]:
df_temp.head()

Unnamed: 0,sobject_id,_merge
0,131116000501262,left_only
1,131116000501383,left_only
2,131116001001192,both
3,131116001001213,left_only
4,131116001001279,both


In [93]:
df_final_sobject_ids_temp = df_temp.loc[df_temp['_merge'] == "left_only"]

In [94]:
df_final_sobject_ids_temp.head()

Unnamed: 0,sobject_id,_merge
0,131116000501262,left_only
1,131116000501383,left_only
3,131116001001213,left_only
5,131116001001379,left_only
8,131118002401234,left_only


In [95]:
df1 = df_final_sobject_ids_temp.drop('_merge', 1)

  df1 = df_final_sobject_ids_temp.drop('_merge', 1)


In [96]:
df1.head()

Unnamed: 0,sobject_id
0,131116000501262
1,131116000501383
3,131116001001213
5,131116001001379
8,131118002401234


In [97]:
df_missing_norm = pd.read_csv("/home/praveen/Dropbox/Masters/python-code/galah-file-read-test/data/GALAH_DR3_list_missing_normalized_spectra_v2.csv", header=None) #GALAH objects with missing normalised spectra 
df_missing_norm.columns = ['sobject_id']

In [98]:
df_temp = pd.merge(df1, df_missing_norm, how='outer', indicator=True)

In [99]:
df_temp

Unnamed: 0,sobject_id,_merge
0,131116000501262,left_only
1,131116000501383,left_only
2,131116001001213,left_only
3,131116001001379,left_only
4,131118002401234,left_only
...,...,...
8576,181226003601116,right_only
8577,181226003601199,right_only
8578,181226004101074,right_only
8579,181226004101265,right_only


In [100]:
df2 = df_temp.loc[df_temp['_merge'] == "left_only"].drop(["_merge"], axis=1)

In [101]:
df2

Unnamed: 0,sobject_id
0,131116000501262
1,131116000501383
2,131116001001213
3,131116001001379
4,131118002401234
...,...
8349,190225004801232
8350,190225005301010
8351,190225005301142
8352,190225005301225


In [107]:
df2.to_csv("final_emission_list.csv")

In [111]:
sobject_ids=df2["sobject_id"].to_numpy().tolist()

In [108]:
df3=pd.read_csv("final_emission_list.csv")

In [110]:
df4 = df3[["sobject_id"]].copy()

In [112]:
df4

Unnamed: 0,sobject_id
0,131116000501262
1,131116000501383
2,131116001001213
3,131116001001379
4,131118002401234
...,...
7364,171230004601352
7365,171230005101058
7366,171230005101079
7367,171230005801087


In [113]:
sobject_ids=df4["sobject_id"].to_numpy().tolist()

In [None]:
#/home/praveen/Documents/all_data

In [114]:
GRID_SIZE = 0.06
LOWER_LAMBDA = 6472.5
UPPER_LAMBDA = 6740

In [115]:
regrid = np.arange(LOWER_LAMBDA, UPPER_LAMBDA, GRID_SIZE) 

In [116]:
def read_spectra(sobject_id):
    fits_files = [[],[],[],[]]
    for each_ccd in [1,2,3,4]:
        fits_files[each_ccd-1] = glob.glob("/home/praveen/Documents/all_data/"+str(sobject_id)+str(each_ccd)+".fits") #this is reading fits files from file and not downloading directly 

    spectrum = dict()
    for each_ccd in [1,2,3,4]: #GALAH uses indexing from 1 - 4
        if fits_files[each_ccd-1]!=[]: #just using zero indexing here 
            fits = pyfits.open(fits_files[each_ccd-1][0]) 
            
            # Extension 0: Reduced spectrum
            # Extension 1: Relative error spectrum
            # Extension 4: Normalised spectrum, NB: cut for CCD4

            # Extract wavelength grid for the reduced spectrum
            start_wavelength = fits[0].header["CRVAL1"]
            dispersion       = fits[0].header["CDELT1"]
            nr_pixels        = fits[0].header["NAXIS1"]
            reference_pixel  = fits[0].header["CRPIX1"]

            if reference_pixel == 0:
                reference_pixel = 1
            spectrum['wave_red_'+str(each_ccd)] = ((np.arange(0,nr_pixels)--reference_pixel+1)*dispersion+start_wavelength) #this is the reduced spectrum 

            # Extract wavelength grid for the normalised spectrum
            start_wavelength = fits[4].header["CRVAL1"]
            dispersion       = fits[4].header["CDELT1"]
            nr_pixels        = fits[4].header["NAXIS1"]
            reference_pixel  = fits[4].header["CRPIX1"]

            if reference_pixel == 0:
                reference_pixel=1
            spectrum['wave_norm_'+str(each_ccd)] = ((np.arange(0,nr_pixels)--reference_pixel+1)*dispersion+start_wavelength) #this is the normalised spectrum 

            spectrum['sob_red_'+str(each_ccd)]  = np.array(fits[0].data)
            spectrum['uob_red_'+str(each_ccd)]  = np.array(fits[0].data * fits[1].data)

            spectrum['sob_norm_'+str(each_ccd)] = np.array(fits[4].data)
            if each_ccd != 4:
                spectrum['uob_norm_'+str(each_ccd)] = np.array(fits[4].data * fits[1].data)
            else:
                # for normalised error of CCD4, only used appropriate parts of error spectrum
                spectrum['uob_norm_4'] = np.array(fits[4].data * (fits[1].data)[-len(spectrum['sob_norm_4']):])

            fits.close()
        else:
            spectrum['wave_red_'+str(each_ccd)] = []
            spectrum['wave_norm_'+str(each_ccd)] = []
            spectrum['sob_red_'+str(each_ccd)] = []
            spectrum['sob_norm_'+str(each_ccd)] = []
            spectrum['uob_red_'+str(each_ccd)] = []
            spectrum['uob_norm_'+str(each_ccd)] = []
    
    spectrum['wave_red'] = np.concatenate(([spectrum['wave_red_'+str(each_ccd)] for each_ccd in [1,2,3,4]]))
    spectrum['wave_norm'] = np.concatenate(([spectrum['wave_norm_'+str(each_ccd)] for each_ccd in [1,2,3,4]]))
    spectrum['sob_red'] = np.concatenate(([spectrum['sob_red_'+str(each_ccd)] for each_ccd in [1,2,3,4]]))
    spectrum['sob_norm'] = np.concatenate(([spectrum['sob_norm_'+str(each_ccd)] for each_ccd in [1,2,3,4]]))
    spectrum['uob_red'] = np.concatenate(([spectrum['uob_red_'+str(each_ccd)] for each_ccd in [1,2,3,4]]))
    spectrum['uob_norm'] = np.concatenate(([spectrum['uob_norm_'+str(each_ccd)] for each_ccd in [1,2,3,4]]))

    return spectrum 
    
    #returns a 30 "row" dict of numpy array per row 'wave_red_x' is the key for the key value pair
    #camera 3 will be the more useful canmera for Li spectra 

In [105]:
spectra = {}
spectra['read_spec'] = []

In [78]:
sobject_ids = [131123003001096, 140311006601089, 140413005401145]

In [120]:
for i in range(len(sobject_ids)):
    spectrum = read_spectra(sobject_ids[i]) #spectrum['sob_norm_'+str(camera)]
    spectra['read_spec'].append(spectrum['sob_norm_3'])


IndexError: list index out of range

In [80]:
spectra['read_spec']

[array([0.97637316, 0.98607529, 1.00008301, ..., 0.95826437, 0.94472458,
        0.90487861]),
 array([1.03711819, 1.04724289, 1.06785678, ..., 0.98376626, 0.9222493 ,
        0.9529037 ]),
 array([0.99184857, 0.96991195, 0.95798169, ..., 1.07005153, 1.04529462,
        1.04319349]),
 array([0.96005216, 0.9864934 , 0.98102335, ..., 0.98551172, 1.03933471,
        1.04218208]),
 array([1.07122999, 1.07448977, 1.0465883 , ..., 1.07532107, 1.04722289,
        1.02215217]),
 array([0.9431149 , 0.97428987, 0.98914246, ..., 1.01557591, 1.03602412,
        1.01802273]),
 array([0.96952635, 0.96903509, 0.95862503, ..., 0.99055677, 0.9916224 ,
        0.99970385]),
 array([0.97844052, 0.99291587, 0.99945905, ..., 0.98802798, 0.98932043,
        0.96722354]),
 array([1.06468714, 1.06306572, 1.03917726, ..., 1.08526916, 1.14515582,
        1.00577076]),
 array([1.00864268, 1.00736526, 1.01947545, ..., 1.00188861, 1.00698263,
        0.99513662]),
 array([0.97789376, 0.98148274, 0.98596956, ..., 0

In [117]:
def resample_spectra(spectrum, camera, verbose):

    spec_resample, spec_errs_resample = spectres(regrid,  spectrum['wave_norm_'+str(camera)], spectrum['sob_norm_'+str(camera)], spec_errs= spectrum['uob_norm_'+str(camera)],verbose=verbose) 

    return spec_resample, spec_errs_resample

#for Li pick camera 3

In [118]:
#open an empty dict to store the results 
resampled_spectra_collection = {}
resampled_spectra_collection['spec_resample'] = []

resampled_error_collection = {}
resampled_error_collection['error_resample'] = []

In [119]:
for sobject_id in sobject_ids:
    #this is the normalised resampled spectra 
    temp_spectrum = resample_spectra(read_spectra(sobject_id), 3, False)
    temp_spectrum[0][np.isnan(temp_spectrum[0])] = 1 #padding 
    resampled_spectra_collection['spec_resample'].append(temp_spectrum[0])

    #these are the error spectra
    #calculate mean error for padding 

    non_na_values = temp_spectrum[1][~np.isnan(temp_spectrum[1])]
    mean_error = np.mean(non_na_values)

    temp_spectrum[1][np.isnan(temp_spectrum[1])] = mean_error
    resampled_error_collection['error_resample'].append(temp_spectrum[1])

AttributeError: 'list' object has no attribute 'shape'