In [1]:
import os
import sys

import numpy as np
import pandas as pd
import torch

import AstroChemNet.utils as utils

project_root = os.path.abspath("../../")
os.chdir(project_root)
sys.path.insert(0, project_root)
from configs.general import GeneralConfig

seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

In [2]:
initial_abundances = np.load("utils/initial_abundances.npy")
df_inits = pd.DataFrame(initial_abundances, columns=GeneralConfig.species)
df_inits["Radfield"] = 0
df_inits["Time"] = 0
df_inits["Av"] = 0
df_inits["gasTemp"] = 0
df_inits["Density"] = 0

df_inits

Unnamed: 0,#C,#C2,#C2H,#C2H2,#C2H2O,#C2H3,#C2H3N,#C2H4,#C2H4N,#C2H4O,...,SIS+,SO,SO+,SO2,SO2+,Radfield,Time,Av,gasTemp,Density
0,9.999999999999999e-31,9.999999999999999e-31,9.999999999999999e-31,9.999999999999999e-31,9.999999999999999e-31,9.999999999999999e-31,9.999999999999999e-31,9.999999999999999e-31,9.999999999999999e-31,9.999999999999999e-31,...,9.999999999999999e-31,9.999999999999999e-31,9.999999999999999e-31,9.999999999999999e-31,9.999999999999999e-31,0,0,0,0,0


In [3]:
df = pd.read_hdf("data/gravitational_collapse.h5", key="df", start=0)
df = df.drop(columns=["dustTemp", "dstep", "zeta", "SURFACE", "BULK"])
df.columns = utils.rename_columns(df.columns)

species = [
    col for col in df.columns if col not in GeneralConfig.metadata + GeneralConfig.phys
]
species = sorted(species)

df.sort_values(by=["Model", "Time"], inplace=True)

df["Radfield"] = np.maximum(df["Radfield"], 1e-4)
output_chunks = []

params = ["Density", "Radfield", "Av", "gasTemp"]

for tracer, tdf in df.groupby("Model", sort=False):
    tdf = tdf.reset_index(drop=True)

    df_inits["Model"] = tdf.iloc[0]["Model"]

    tdf = pd.concat([df_inits, tdf], ignore_index=True)

    physical = tdf[params].shift(-1)
    physical.iloc[-1] = physical.iloc[-2]

    tdf[params] = physical

    output_chunks.append(tdf)

df = pd.concat(output_chunks, ignore_index=True)
df = df.sort_values(by=["Model", "Time"]).reset_index(drop=True)
df.insert(0, "Index", range(len(df)))

df = df[["Index", "Model", "Time"] + params + species]

df

Unnamed: 0,Index,Model,Time,Density,Radfield,Av,gasTemp,#C,#C2,#C2H,...,SIH5+,SIO,SIO+,SIOH+,SIS,SIS+,SO,SO+,SO2,SO2+
0,0,0.0,0.0,2.467492e+06,0.000100,123.046825,13.316765,1.000000e-30,1.000000e-30,1.000000e-30,...,1.000000e-30,1.000000e-30,1.000000e-30,1.000000e-30,1.000000e-30,1.000000e-30,1.000000e-30,1.000000e-30,1.000000e-30,1.000000e-30
1,1,0.0,92.9,2.467549e+06,0.000922,124.772288,37.272757,2.810437e-13,6.062435e-11,1.934587e-15,...,3.889544e-17,3.842952e-07,1.558732e-13,3.251612e-10,4.492084e-11,3.851252e-17,1.556905e-13,1.529874e-14,1.126435e-17,4.956696e-26
2,2,0.0,185.8,2.467720e+06,0.002424,124.474635,37.273253,1.328167e-13,2.803096e-10,2.958563e-13,...,2.442836e-13,5.779119e-07,1.470241e-14,1.819444e-09,1.441175e-10,5.263925e-18,1.120431e-12,3.377016e-12,1.721883e-17,4.192388e-26
3,3,0.0,278.7,2.468005e+06,0.004113,118.360946,37.274079,1.778127e-13,3.407568e-10,4.664886e-13,...,2.951933e-13,5.894948e-07,8.410137e-15,2.891770e-09,1.889457e-10,6.987246e-18,1.889163e-12,3.774106e-12,2.898309e-17,6.772802e-26
4,4,0.0,371.6,2.468401e+06,0.006342,123.680609,37.275234,2.382298e-13,4.143126e-10,7.075867e-13,...,2.955418e-13,5.804798e-07,5.393911e-15,3.501012e-09,2.112234e-10,9.355900e-18,2.328728e-12,3.795913e-12,3.571743e-17,8.834764e-26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2963758,2963758,9978.0,27126.8,7.763906e+04,0.004172,4.395878,28.220272,1.993537e-14,9.905272e-10,1.823899e-11,...,7.428627e-14,3.570409e-08,3.374260e-16,2.323992e-10,2.944902e-12,1.366942e-14,6.448945e-12,1.881876e-11,2.166335e-14,2.056398e-20
2963759,2963759,9978.0,27219.7,7.710364e+04,0.004286,2.907921,28.196477,1.911747e-14,9.899456e-10,1.826711e-11,...,7.393906e-14,3.563371e-08,3.354582e-16,2.311452e-10,2.911839e-12,1.357824e-14,6.466022e-12,1.876441e-11,2.158776e-14,2.064687e-20
2963760,2963760,9978.0,27312.6,7.657411e+04,0.009786,1.173676,28.172518,1.839867e-14,9.893626e-10,1.829151e-11,...,7.357795e-14,3.556663e-08,3.423802e-16,2.304969e-10,2.879267e-12,1.348634e-14,6.482933e-12,1.865966e-11,2.151327e-14,2.073020e-20
2963761,2963761,9978.0,27405.5,7.605042e+04,0.005952,5.029348,28.148750,2.276693e-14,9.887758e-10,1.832739e-11,...,7.232625e-14,3.574368e-08,7.269153e-16,2.357076e-10,2.853023e-12,1.333271e-14,6.486427e-12,2.005257e-11,2.141177e-14,2.135194e-20


In [None]:
df = df[["Model", "Time"] + params]

df.to_csv("physical_parameters.csv", index=False)

In [4]:
tracers = df["Model"].unique()
np.random.shuffle(tracers)

# 70% train, 30% validation split
split_idx = int(len(tracers) * 0.70)

train_tracers = tracers[:split_idx]
val_tracers = tracers[split_idx:]

train_df = df[df["Model"].isin(train_tracers)]
val_df = df[df["Model"].isin(val_tracers)]

train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

train_df.to_hdf("data/grav_collapse_clean.h5", key="train", mode="w")
val_df.to_hdf("data/grav_collapse_clean.h5", key="val", mode="a")

In [None]:
df_init = pd.read_csv("initial_abundances.dat")
df_init = df_init.drop(
    columns=[
        "Time",
        "Density",
        "gasTemp",
        "dustTemp",
        "av",
        "Radfield",
        "zeta",
        "point",
    ]
)
df_init.columns = utils.rename_columns(df_init.columns)
df_init = df_init.drop(columns=["BULK", "SURFACE"])

df_init = df_init[species]

df_init.to_numpy()

np.save("initial_abundances.npy", df_init.to_numpy())

np.savetxt("species.txt", df_init.columns, fmt="%s", delimiter=" ")