# Imports

In [None]:
import pandas as pd

# Functions

In [None]:
# ===== String to Number =====
def to_number_or_keep(x):
    """Convert a string to either an integer, a float, or keep it as a string if conversion is not possible.

    Args:
        x (str): a string of interest.

    Returns:
        int, float, or str: the converted number if possible, otherwise the original string.
    """
    if isinstance(x, str):
        try:
            return int(x)       # 1. Try to turn the string into an integer
        except ValueError:
            try:
                return float(x) # 2. If integer fails, try to turn it into a float
            except ValueError:
                return x        # 3. If both fail, keep it as the original string
    return x  # 4. If x wasn’t a string to begin with, just return it unchanged

# Create table from .txt

In [64]:
# Read the entire file
txt_path = "C:\\Users\\User\\Desktop\\Projects\\Dr. Surapong - Galaxy Gas Outflows 2024\\Codes\\Raw Data\\VIMOS VLT Ultra deep\\VVDS-F0226-04\\population_profile.txt"

# Use pandas read_csv for efficient reading
table = pd.read_csv(
    txt_path,
    sep=r'\s+',           # Split on whitespace
    skiprows=3,           # Skip first 3 lines (comments)
    # comment='#'           # Handle any additional comment lines
)

# Apply type conversion to all columns
table = table.apply(lambda col: col.map(to_number_or_keep))

table.head()

Unnamed: 0,#,NUM,ID-IAU,ALPHA,DELTA,MAGI,Z,ZFLAGS,MAG_U_CFH12K,MAGERR_AUTO_U,...,MGB,D_MGB,D4,D_D4,D4N,D_D4N,HALPHA_ABS,HBETA_ABS,HGAMMA_ABS,HDELTA_ABS
0,910152436,VVDS-J022724.90-043809.9,36.853777,-4.636105,24.5669,2.1,1,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.0,-9.999,0.0,0.0,0.0,0.0,
1,910152646,VVDS-J022723.69-043807.2,36.848737,-4.635337,24.698,2.3663,2,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.0,-9.999,0.0,0.0,0.0,0.0,
2,910153130,VVDS-J022711.17-043806.2,36.796542,-4.635074,24.3329,1.5105,3,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.0,-9.999,0.0,0.0,0.0,0.0,
3,910153225,VVDS-J022635.17-043804.6,36.646543,-4.634627,23.9874,1.2802,9,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.474,0.209,0.0,0.0,0.0,5.84,
4,910153609,VVDS-J022636.01-043800.1,36.650068,-4.633384,24.6551,9.9999,0,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.0,-9.999,0.0,0.0,0.0,0.0,


Make sure to not run the cell below more than once. If you are unsure, **RUN THE ENTIRE SECTION AGAIN**.

In [65]:
column_names = table.columns.tolist()
substitute_column_names = column_names.copy()
substitute_column_names.append("REMOVE_ME")

# Reassign column names with shifted names
table = table.rename(columns = {column_names[i]: substitute_column_names[i + 1] for i in range(len(column_names))})
table.drop("REMOVE_ME", axis=1)

table.head()

Unnamed: 0,NUM,ID-IAU,ALPHA,DELTA,MAGI,Z,ZFLAGS,MAG_U_CFH12K,MAGERR_AUTO_U,MAG_B_CFH12K,...,D_MGB,D4,D_D4,D4N,D_D4N,HALPHA_ABS,HBETA_ABS,HGAMMA_ABS,HDELTA_ABS,REMOVE_ME
0,910152436,VVDS-J022724.90-043809.9,36.853777,-4.636105,24.5669,2.1,1,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.0,-9.999,0.0,0.0,0.0,0.0,
1,910152646,VVDS-J022723.69-043807.2,36.848737,-4.635337,24.698,2.3663,2,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.0,-9.999,0.0,0.0,0.0,0.0,
2,910153130,VVDS-J022711.17-043806.2,36.796542,-4.635074,24.3329,1.5105,3,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.0,-9.999,0.0,0.0,0.0,0.0,
3,910153225,VVDS-J022635.17-043804.6,36.646543,-4.634627,23.9874,1.2802,9,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.474,0.209,0.0,0.0,0.0,5.84,
4,910153609,VVDS-J022636.01-043800.1,36.650068,-4.633384,24.6551,9.9999,0,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.0,-9.999,0.0,0.0,0.0,0.0,


# Create HDF5 and setting permission

We name the "directory" inside the constructed `HDF5` file as `poppro`, which stands for `Population Profile`. This table stored under this specific key **MUST NOT BE CHANGED OR MANIPULATED IN ANYWAY**.

In [None]:
file_path = "UltraDeep_Population_Profile.h5"

with pd.HDFStore(file_path, mode = "w") as store:
    store.put("PopulationProfile", table, format = "table")
    print("""===== Storing Table to HDF5 =====
>>> Initial table of population profile for the Ultra-Deep (UD) observation field has been created.
>>> The table is saved to 'UltraDeep_Population_Profile.h5' with the key 'PopulationProfile'.""")

Initial table of population profile for the Ultra-Deep (UD) observation field has been created.
>>> The table is saved to 'UltraDeep_Population_Profile.h5' with the key 'PopulationProfile'.


# Testing

In [70]:
with pd.HDFStore("UltraDeep_Population_Profile.h5", mode='r') as store:
    loaded_table = store.get("PopulationProfile")  # Load the DataFrame from the HDF5 file
    print("""===== Loaded Table from HDF5 =====
>>> The table has been successfully loaded from 'UltraDeep_Population_Profile.h5' with the key 'PopulationProfile'.""")

loaded_table.head()

===== Loaded Table from HDF5 =====
>>> The table has been successfully loaded from 'UltraDeep_Population_Profile.h5' with the key 'PopulationProfile'.


Unnamed: 0,NUM,ID-IAU,ALPHA,DELTA,MAGI,Z,ZFLAGS,MAG_U_CFH12K,MAGERR_AUTO_U,MAG_B_CFH12K,...,D_MGB,D4,D_D4,D4N,D_D4N,HALPHA_ABS,HBETA_ABS,HGAMMA_ABS,HDELTA_ABS,REMOVE_ME
0,910152436,VVDS-J022724.90-043809.9,36.853777,-4.636105,24.5669,2.1,1,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.0,-9.999,0.0,0.0,0.0,0.0,
1,910152646,VVDS-J022723.69-043807.2,36.848737,-4.635337,24.698,2.3663,2,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.0,-9.999,0.0,0.0,0.0,0.0,
2,910153130,VVDS-J022711.17-043806.2,36.796542,-4.635074,24.3329,1.5105,3,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.0,-9.999,0.0,0.0,0.0,0.0,
3,910153225,VVDS-J022635.17-043804.6,36.646543,-4.634627,23.9874,1.2802,9,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.474,0.209,0.0,0.0,0.0,5.84,
4,910153609,VVDS-J022636.01-043800.1,36.650068,-4.633384,24.6551,9.9999,0,-99.9,-99.9,-99.9,...,-9.999,0.0,-9.999,0.0,-9.999,0.0,0.0,0.0,0.0,


# Notes on adding a new table to HDF5 file

As we have mentioned before, we will not be manipulating or do anything with the `PopulationProfile` table in the `UltraDeep_Population_Profile.h5`. Later on, we may need to construct a new table and for convenience just store it in the same `.h5` file but under different key. To do this, we **MUST USE** the mode `mode = 'a'` and not `mode = 'w'` to avoid overwriting the original contents of the `.h5` file. An example is shown below.

In [73]:
dummy_table = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
dummy_table.head()

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [75]:
with pd.HDFStore("UltraDeep_Population_Profile.h5", mode='a') as store:
    store.put("DummyTable", dummy_table, format = "table")
    print(f"""===== Storing Dummy Table to HDF5 =====
>>> Dummy table has been successfully stored in 'UltraDeep_Population_Profile.h5' with the key 'DummyTable'.
>>> Now, the table have the following keys: {store.keys()}""")

===== Storing Dummy Table to HDF5 =====
>>> Dummy table has been successfully stored in 'UltraDeep_Population_Profile.h5' with the key 'DummyTable'.
>>> Now, the table have the following keys: ['/DummyTable', '/PopulationProfile']
