In [40]:
# all imports
import numpy as np
import h5py
import os
import requests
from astropy.table import Table, vstack
import matplotlib.pyplot as plt
from astropy.io import fits
from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy.wcs import WCS
import math
import matplotlib as mpl
from astropy.visualization.wcsaxes import SphericalCircle
import pandas as pd

In [41]:
# define directories
data_dir = '/global/cfs/projectdirs/desi/science/td/pv/tfgalaxies/'
cache_dir = '/pscratch/sd/j/jjpim/cache/'
v_dir = '/pscratch/sd/j/jjpim/massmag/'

# load updated master table
tf_mastertable = Table.read(data_dir + 'tf_mastertable_massmag_stellar.fits') 

#load SGA table
SGA = Table.read('/global/cfs/cdirs/cosmo/data/sga/2020/SGA-2020.fits', 'ELLIPSE')

In [42]:
# Define the directory containing the data
prova_data_dir = '/global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/'

# List all files in the directory
files = [os.path.join(prova_data_dir, file) for file in os.listdir(prova_data_dir)]

# Print out the full paths of the files
for file_path in files:
    print(file_path)
    
# Confirm the number of files
print(f"Total number of files: {len(files)}")  

/global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.82.hdf5
/global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.83.hdf5
/global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.68.hdf5
/global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.73.hdf5
/global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.53.hdf5
/global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.0.hdf5
/global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.98.hdf5
/global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.85.hdf5
/global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.41.hdf5
/global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.93.hdf5
/global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.67.hdf5
/global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.45.hdf5
/global/cfs/cdirs/desi/users/

In [34]:
t1 = Table.read(prova_data_dir + 'BGS_ANY_full.vagc.83.hdf5') 

In [35]:
from astropy.table import Table, join
filtered_table = join(tf_mastertable, t1, keys='TARGETID', join_type='inner')
print(filtered_table)

     TARGETID        TARGET_RA     ...   SEDFLOW_LOGMSTAR_MAP 
----------------- ---------------- ... -----------------------
39633063478560197 224.248403344869 ...       9.732120175797151
39633063486948665 224.868891649595 ...       8.685071568885311
39633063486950541 224.993757394256 ...       10.37968472005075
39633063491142091 225.111852744681 ...       9.731283179502089
39633063503726776 226.261734835633 ...       9.403603989467934
39633063516309960 227.222864263853 ...      10.578668635285547
39633063533086932 228.493781640775 ...       9.408501656916014
39633063533087632 228.555723662478 ...      10.613609802813286
39633063558252101 230.387408025796 ...        10.7407588937679
39633063566641794 231.108156628435 ...      10.359174299492812
              ...              ... ...                     ...
39633081702812253 193.950340847289 ...  1.5475367708320214e-07
39633081707005899 194.216337157937 ...       7.030793517420685
39633081715394791 194.891944681812 ...       10.0987951

In [36]:
'''def process_file(file_path, tf_mastertable):
    try:
        provabgs = Table.read(file_path, format='hdf5')
        
        #check for needed column
        if 'SEDFLOW_LOGMSTAR_MAP' not in provabgs.colnames:
            print(f"Warning: Column 'SEDFLOW_LOGMSTAR_MAP' not found in {file_path}. Skipping this file.")
            return tf_mastertable, True  # Return True to indicate this file was skipped

        #get target ids
        target_ids = tf_mastertable['TARGETID'][tf_mastertable['M_R26'] > 0]

        #find matching TARGETIDs in the provabgs table
        valid_targets_mask = np.isin(provabgs['TARGETID'], target_ids)
        matching_targets = provabgs[valid_targets_mask]

        if len(matching_targets) == 0:
            print(f"No matching targets found in {file_path}. Skipping.")
            return tf_mastertable, True  # Return True to indicate this file was skipped

        #update tf_mastertable
        for row in matching_targets:
            target_id = row['TARGETID']
            stellar_mass = row['SEDFLOW_LOGMSTAR_MAP']
            log_stellar_mass_samples = row['SEDFLOW_LOGMSTAR_SAMPLES']
            stellar_mass_samples_linear = 10 ** log_stellar_mass_samples

            std_linear = np.std(stellar_mass_samples_linear)
            mean_linear = np.mean(stellar_mass_samples_linear)
            std_log = std_linear / mean_linear

            #find the corresponding sga id for the target
            sga_id = tf_mastertable[tf_mastertable['TARGETID'] == target_id]['SGA_ID'][0]

            #update the M_STELLAR_PROVABGS and M_STELLAR_PROVABGS_ERR columns
            mask = np.logical_and(tf_mastertable['SGA_ID'] == sga_id, tf_mastertable['M_R26'] > 0)
            tf_mastertable['M_STELLAR_PROVABGS'][mask] = stellar_mass
            tf_mastertable['M_STELLAR_PROVABGS_ERR'][mask] = std_log

        return tf_mastertable, False  #not skipped

    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return tf_mastertable, True  #skipped

In [46]:
def update_stellar_masses(tf_mastertable, provabgs):
    # initialize list
    targets = []
    
    # Extract target IDs from tf_mastertable where M_R26 > 0
    target_ids = tf_mastertable['TARGETID'][tf_mastertable['M_R26'] > 0]
    
    # Find the matching targets in provabgs
    for target_id in target_ids:
        if len(provabgs[provabgs['TARGETID'] == target_id]) > 0:
            targets.append(target_id)
    
    # Loop through each matching target ID to extract and update masses
    for target_id in targets:
        # Extract stellar mass and log mass samples from provabgs
        stellar_mass = provabgs[provabgs['TARGETID'] == target_id]['SEDFLOW_LOGMSTAR_MAP'][0]
        log_stellar_mass_samples = provabgs[provabgs['TARGETID'] == target_id]['SEDFLOW_LOGMSTAR_SAMPLES'].data[0]
        
        # Convert to linear scale
        stellar_mass_samples_linear = 10 ** log_stellar_mass_samples
        
        # Calculate the standard deviation and mean in linear space
        std_linear = np.std(stellar_mass_samples_linear)
        mean_linear = np.mean(stellar_mass_samples_linear)
        
        # Calculate standard deviation in log space
        std_log = std_linear / mean_linear
        
        # Find corresponding SGA_ID
        sga_id = tf_mastertable[tf_mastertable['TARGETID'] == target_id]['SGA_ID'][0]
        
        # Update the tf_mastertable with stellar mass and error values
        tf_mastertable['M_STELLAR_PROVABGS'][np.logical_and(tf_mastertable['SGA_ID'] == sga_id, tf_mastertable['M_R26'] > 0)] = stellar_mass
        tf_mastertable['M_STELLAR_PROVABGS_ERR'][np.logical_and(tf_mastertable['SGA_ID'] == sga_id, tf_mastertable['M_R26'] > 0)] = std_log

    return tf_mastertable

In [50]:
import os
import h5py
from astropy.table import Table

# Initialize the list of files
files = [os.path.join(prova_data_dir, file) for file in os.listdir(prova_data_dir)]

# Load the master table once (assuming it's the same for all files)
tf_mastertable = Table.read(data_dir + 'tf_mastertable_massmag_stellar.fits')  # Adjust the path as necessary

# Iterate through each file in the directory
for file in files:
    try:
        # Open the HDF5 file containing the Astropy table
        with h5py.File(file, 'r') as f:
            # List all available datasets in the file
            keys = list(f.keys())

            # Loop through the datasets to find the one that matches the structure of the Astropy table
            for key in keys:
                data = f[key][:]
                # Check if the data matches the structure of an Astropy table (e.g., 2D array, columns)
                if isinstance(data, (list, np.ndarray)):  # Adjust this check if needed
                    # Assuming that this is the right table structure, convert it into an Astropy table
                    provabgs = Table(data)  # Convert to Astropy Table
                    break
            else:
                raise KeyError(f"No matching dataset found in {file}")

        # Update tf_mastertable with stellar mass information
        tf_mastertable = update_stellar_masses(tf_mastertable, provabgs)

    except Exception as e:
        print(f"Error processing {file}: {e}")

# Optionally, save the updated tf_mastertable after processing all files
#tf_mastertable.write('updated_tf_mastertable.fits', format='fits', overwrite=True)


Error processing /global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.fluxes.hdf5: 'SEDFLOW_LOGMSTAR_MAP'


In [None]:
def update_stellar_masses(tf_mastertable, provabgs):
    # Initialize columns if they don't exist
    if 'M_STELLAR_PROVABGS' not in tf_mastertable.colnames:
        tf_mastertable['M_STELLAR_PROVABGS'] = np.zeros(len(tf_mastertable))
    if 'M_STELLAR_PROVABGS_ERR' not in tf_mastertable.colnames:
        tf_mastertable['M_STELLAR_PROVABGS_ERR'] = np.zeros(len(tf_mastertable))
    
    # Initialize list
    targets = []
    
    # Extract target IDs from tf_mastertable where M_R26 > 0
    target_ids = tf_mastertable['TARGETID'][tf_mastertable['M_R26'] > 0]
    
    # Find the matching targets in provabgs
    for target_id in target_ids:
        if len(provabgs[provabgs['TARGETID'] == target_id]) > 0:
            targets.append(target_id)
    
    # Loop through each matching target ID to extract and update masses
    for target_id in targets:
        # Extract matching rows for the target_id in provabgs
        matching_rows = provabgs[provabgs['TARGETID'] == target_id]
        
        if len(matching_rows) > 0:
            # Extract stellar mass and log mass samples from provabgs
            stellar_mass = matching_rows['SEDFLOW_LOGMSTAR_MAP'][0]
            log_stellar_mass_samples = matching_rows['SEDFLOW_LOGMSTAR_SAMPLES'].data[0]
            
            # Convert to linear scale
            stellar_mass_samples_linear = 10 ** log_stellar_mass_samples
            
            # Calculate the standard deviation and mean in linear space
            std_linear = np.std(stellar_mass_samples_linear)
            mean_linear = np.mean(stellar_mass_samples_linear)
            
            # Calculate standard deviation in log space
            std_log = std_linear / mean_linear
            
            # Find corresponding SGA_ID
            sga_id = tf_mastertable[tf_mastertable['TARGETID'] == target_id]['SGA_ID'][0]
            
            # Update the tf_mastertable with stellar mass and error values
            mask = np.logical_and(tf_mastertable['SGA_ID'] == sga_id, tf_mastertable['M_R26'] > 0)
            tf_mastertable['M_STELLAR_PROVABGS'][mask] = stellar_mass
            tf_mastertable['M_STELLAR_PROVABGS_ERR'][mask] = std_log
            
            # Debugging print statements
            print(f"Updated target_id {target_id} (SGA_ID: {sga_id}):")
            print(f"  Stellar mass: {stellar_mass}")
            print(f"  Log mass samples: {log_stellar_mass_samples}")
            print(f"  Std in log space: {std_log}")
        else:
            print(f"No matching rows found for target_id {target_id} in provabgs.")

    # Return the updated tf_mastertable
    return tf_mastertable

# Initialize the list of files
files = [os.path.join(prova_data_dir, file) for file in os.listdir(prova_data_dir)]

# Load the master table once (assuming it's the same for all files)
tf_mastertable = Table.read(data_dir + 'tf_mastertable_massmag_stellar.fits')  # Adjust the path as necessary

# Iterate through each file in the directory
for file in files:
    try:
        # Open the HDF5 file containing the Astropy table
        with h5py.File(file, 'r') as f:
            # List all available datasets in the file
            keys = list(f.keys())

            # Loop through the datasets to find the one that matches the structure of the Astropy table
            for key in keys:
                data = f[key][:]
                # Check if the data matches the structure of an Astropy table (e.g., 2D array, columns)
                if isinstance(data, (list, np.ndarray)):  # Adjust this check if needed
                    # Assuming that this is the right table structure, convert it into an Astropy table
                    provabgs = Table(data)  # Convert to Astropy Table
                    break
            else:
                raise KeyError(f"No matching dataset found in {file}")

        # Update tf_mastertable with stellar mass information
        tf_mastertable = update_stellar_masses(tf_mastertable, provabgs)

    except Exception as e:
        print(f"Error processing {file}: {e}")

# Optionally, save the updated tf_mastertable after processing all files
tf_mastertable.write('tf_mastertable_massmag_stellar.fits', format='fits', overwrite=True)


Updated target_id 39633267682444633 (SGA_ID: 114405):
  Stellar mass: 0.0
  Log mass samples: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.]
  Std in log space: 0.0
Updated target_id 39633267682444633 (SGA_ID: 114405):
  Stellar mass: 0.0
  Log mass samples: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.]
  Std in log space: 0.0
Updated target_id 39627896901337314 (SGA_ID: 272562):
  Stellar mass: 10.105200301066112
  Log mass samples: [10.13700871 10.10306042 10.42514093 10.217

In [62]:
# Check common TARGETIDs and their types
provabgs_targetids = provabgs['TARGETID'].data.astype(str)
tf_mastertable_targetids = tf_mastertable['TARGETID'].data.astype(str)
common_targetids = np.intersect1d(provabgs_targetids, tf_mastertable_targetids)

print("Common TARGETIDs:", common_targetids)
print("Number of common TARGETIDs:", len(common_targetids))

# Check if any rows are selected in tf_mastertable
common_rows = tf_mastertable[np.isin(tf_mastertable['TARGETID'], common_targetids)]
print("Filtered rows in tf_mastertable with common TARGETIDs:")
print(common_rows)

# Check the columns you're updating
print("M_STELLAR_PROVABGS values:", tf_mastertable['M_STELLAR_PROVABGS'])
print("M_STELLAR_PROVABGS_ERR values:", tf_mastertable['M_STELLAR_PROVABGS_ERR'])


Common TARGETIDs: ['39633220072899419' '39633220077094831' '39633220098064420'
 '39633220106455753' '39633220110650641' '39633220114843226'
 '39633220114843482' '39633220148396181' '39633220156787159'
 '39633220156787995' '39633220198728176' '39633220223894583'
 '39633220278419603' '39633220311974884' '39633220311976131'
 '39633220311976229' '39633220311977981' '39633220320362750'
 '39633220328754273' '39633220332947952' '39633220341336736'
 '39633220353919368' '39633220366499950' '39633220374889492'
 '39633220374891724' '39633220387473365' '39633220437804229'
 '39633220442000752' '39633220446193036' '39633220454582603'
 '39633220454583584' '39633220467164151' '39633220467164474'
 '39633220467166633' '39633220479748453' '39633220483941323'
 '39633220500719524' '39633220504912922' '39633220504913152'
 '39633220504914283' '39633220521689381' '39633220521690184'
 '39633220534275497' '39633220542663699' '39633220555244993'
 '39633220559438319' '39633220567826863' '39633220572021914'
 '3963

In [38]:
'''
# Initialize a counter for skipped files
skipped_count = 0

# Wrap the single file path in a list so the loop iterates correctly
files = ['/global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.83.hdf5']

#Process each file and count how many were skipped
for file_path in files:
    print(f"Processing {file_path}...")
    tf_mastertable, was_skipped = process_file(file_path, tf_mastertable)
    if was_skipped:
        skipped_count += 1

    # Optionally save the updated master table after each iteration
    tf_mastertable.write('intermediate_tf_mastertable.fits', overwrite=True)

# Save the final updated master table
tf_mastertable.write('final_tf_mastertable.fits', overwrite=True)

# Print the total number of skipped files
print(f"Processing complete. Final table saved as 'final_tf_mastertable.fits'.")
print(f"Total number of skipped files: {skipped_count}")

Processing /global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.83.hdf5...
No matching targets found in /global/cfs/cdirs/desi/users/chahah/sedflow/desiy1_bgs/BGS_ANY_full.vagc.83.hdf5. Skipping.
Processing complete. Final table saved as 'final_tf_mastertable.fits'.
Total number of skipped files: 1


In [60]:
# Assuming 'TARGETID' is the column name in both tables
provabgs_targetids = provabgs['TARGETID'].data  # Extract TARGETID from provabgs table
tf_mastertable_targetids = tf_mastertable['TARGETID'].data  # Extract TARGETID from tf_mastertable table

# Find common TARGETID values
common_targetids = np.intersect1d(provabgs_targetids, tf_mastertable_targetids)

# Optionally, print or return the common TARGETIDs
print("Common TARGETIDs:", common_targetids)

len(common_targetids)

Common TARGETIDs: [39633220072899419 39633220077094831 39633220098064420 39633220106455753
 39633220110650641 39633220114843226 39633220114843482 39633220148396181
 39633220156787159 39633220156787995 39633220198728176 39633220223894583
 39633220278419603 39633220311974884 39633220311976131 39633220311976229
 39633220311977981 39633220320362750 39633220328754273 39633220332947952
 39633220341336736 39633220353919368 39633220366499950 39633220374889492
 39633220374891724 39633220387473365 39633220437804229 39633220442000752
 39633220446193036 39633220454582603 39633220454583584 39633220467164151
 39633220467164474 39633220467166633 39633220479748453 39633220483941323
 39633220500719524 39633220504912922 39633220504913152 39633220504914283
 39633220521689381 39633220521690184 39633220534275497 39633220542663699
 39633220555244993 39633220559438319 39633220567826863 39633220572021914
 39633220572022560 39633220572023215 39633220580410460 39633220580411989
 39633220580412088 39633220580412

638

In [61]:
common_rows = tf_mastertable[np.isin(tf_mastertable['TARGETID'], common_targetids)]

# Print the filtered rows
print(common_rows)


     TARGETID        TARGET_RA     ... M_STELLAR_PROVABGS M_STELLAR_PROVABGS_ERR
----------------- ---------------- ... ------------------ ----------------------
39633220098064420 162.038766796335 ...                 --                     --
39633228151128920 163.824445690751 ...                 --                     --
39633224124597085 162.052614863675 ...                 --                     --
39633224120403338 161.689096022652 ...                 --                     --
39633232194438753 169.191772769351 ...                 --                     --
39633236212582005 173.702734361046 ...                 --                     --
39633236212582472 173.756742407124 ...                 --                     --
39633236208389728 173.484516428452 ...                 --                     --
39633224162345412 165.397190681015 ...                 --                     --
39633232194437617 169.052674193326 ...                 --                     --
              ...           

In [52]:
'''
# Filter the table for rows where SURVEY is 'main' and M_STELLAR_PROVABGS > 0
filtered_rows = tf_mastertable[(tf_mastertable['SURVEY'] == 'main') & (tf_mastertable['M_STELLAR_PROVABGS'] > 0)]

# Display the filtered rows
print(filtered_rows)

     TARGETID        TARGET_RA     ... M_STELLAR_PROVABGS M_STELLAR_PROVABGS_ERR
----------------- ---------------- ... ------------------ ----------------------
 2843327716130818 251.903623582753 ...  9.804860160202987     0.7855994137757417
39632986781517141 251.905642100238 ...  9.804860160202987     0.7855994137757417
 2843327716130819 251.907660632894 ...  9.804860160202987     0.7855994137757417
39632991709825174 251.454123283444 ... 10.177321827062507     0.4776717152120182
 2390333853794308 251.457391111016 ... 10.177321827062507     0.4776717152120182
39632996629744567  251.46462881225 ...  9.401642639405688     1.4047837072235547
39632986785712860 252.298010456348 ... 10.725856861685363     1.2457905935925946
39632991718214679 252.135481605483 ... 10.285887369362008     0.7565416289234379
39632981853209016 253.125176013999 ... 10.026644569358746     1.8056096681145142
39633001075704153 217.631990030448 ... 10.599400404798113     0.5096064620768144
              ...           

In [None]:
#writing new data into table
tf_mastertable.write(data_dir + 'tf_mastertable_massmag_stellar.fits', format='fits', overwrite=True)