In [4]:
import numpy as np
import pandas as pd
import xarray as xr
import os

In [5]:
cases = {
    'case_1': 800,
    'case_2': 400,
    'case_3': 400,
    'case_4': 200,
    'case_5': 100,
    'case_6': 100,
}

base_root = '/data/keeling/a/xx24/e/proj_ml/scenario_libs'

In [None]:
all_data = []
global_id = 0  

for case_name, n_scenarios in cases.items():
    case_path = os.path.join(base_root, case_name, 'scenarios')

    for local_idx in range(n_scenarios):
        scenario_dir = f'scenario_{local_idx:04d}'
        scenario_path = os.path.join(case_path, scenario_dir, 'out_init')

        aero = xr.open_dataset(os.path.join(scenario_path, 'urban_plume_process.nc'))
        # take [1:] 1-24h
        bc_arr    =   aero['tot_bc_mass_conc'  ][1:].values
        oc_arr    =   aero['tot_oc_mass_conc'  ][1:].values
        soa_arr   =   aero['tot_soa_mass_conc' ][1:].values
        oin_arr   =   aero['tot_oin_mass_conc' ][1:].values
        na_arr    =   aero['tot_na_mass_conc'  ][1:].values  
        cl_arr    =   aero['tot_cl_mass_conc'  ][1:].values
        so4_arr   =   aero['tot_so4_mass_conc' ][1:].values
        nh4_arr   =   aero['tot_nh4_mass_conc' ][1:].values
        no3_arr   =   aero['tot_no3_mass_conc' ][1:].values
        dens_arr  =   aero['density'           ][1:].values
        numc_arr  =   aero['tot_num_conc'      ][1:].values
        dpnum_arr =   aero['tot_dp_num_conc'   ][1:].values
        bulk_dp_arr = dpnum_arr / numc_arr
        
        global_id += 1

        for j in range(2, 26):
            idx = j - 2
            suffix = f'{j:08d}'
            pre_path = os.path.join(scenario_path, f'urban_plume_0001_{suffix}.nc')
            ccn_path = os.path.join(scenario_path, f'urban_plume_{suffix}_process.nc')

            pre = xr.open_dataset(pre_path)
            ccn = xr.open_dataset(ccn_path)

            time_hr = int(pre['time'].values / 3600)
            doy     = int(pre['start_day_of_year'].values)
            temp    = float(pre['temperature'].values)
            rh      = float(pre['relative_humidity'].values)
            pres    = float(pre['pressure'].values)

            bc_val      = float(bc_arr   [idx])
            oc_val      = float(oc_arr   [idx])
            oin_val     = float(oin_arr  [idx])
            na_val      = float(na_arr   [idx])
            cl_val      = float(cl_arr   [idx])
            so4_val     = float(so4_arr  [idx])
            no3_val     = float(no3_arr  [idx])
            nh4_val     = float(nh4_arr  [idx])
            soa_val     = float(soa_arr  [idx])
            dens_val    = float(dens_arr [idx])
            numc_val    = float(numc_arr [idx])
            bulk_val    = float(bulk_dp_arr[idx])

            sc_edges = ccn['sc_edges']
            ratio = np.log(sc_edges[1]/sc_edges[0])
            ccs = ccn['sc_dist']
            cvs = ccn['sc_varying_sigma_dist']
            sc   = ccn['sc']
            ccn_cs_01  = float(ratio * ccs.sel(sc=sc<=0.001).sum())
            ccn_cs_015 = float(ratio * ccs.sel(sc=sc<=0.0015).sum())
            ccn_cs_02  = float(ratio * ccs.sel(sc=sc<=0.002).sum())
            ccn_cs_025 = float(ratio * ccs.sel(sc=sc<=0.0025).sum())
            ccn_cs_03  = float(ratio * ccs.sel(sc=sc<=0.003).sum())
            ccn_cs_04  = float(ratio * ccs.sel(sc=sc<=0.004).sum())
            ccn_cs_06  = float(ratio * ccs.sel(sc=sc<=0.006).sum())
            ccn_cs_08  = float(ratio * ccs.sel(sc=sc<=0.008).sum())
            ccn_cs_1   = float(ratio * ccs.sel(sc=sc<=0.01 ).sum())

            ccn_vs_01  = float(ratio * cvs.sel(sc=sc<=0.001).sum())
            ccn_vs_015 = float(ratio * cvs.sel(sc=sc<=0.0015).sum())
            ccn_vs_02  = float(ratio * cvs.sel(sc=sc<=0.002).sum())
            ccn_vs_025 = float(ratio * cvs.sel(sc=sc<=0.0025).sum())
            ccn_vs_03  = float(ratio * cvs.sel(sc=sc<=0.003).sum())
            ccn_vs_04  = float(ratio * cvs.sel(sc=sc<=0.004).sum())
            ccn_vs_06  = float(ratio * cvs.sel(sc=sc<=0.006).sum())
            ccn_vs_08  = float(ratio * cvs.sel(sc=sc<=0.008).sum())
            ccn_vs_1   = float(ratio * cvs.sel(sc=sc<=0.01 ).sum())
            
            row = {
                'Global_ID':         global_id,
                'DayofYear':         doy,
                'Time_hr':           time_hr,
                'Temperature':       temp,
                'RH':                rh,
                'Pressure':          pres,
                'Density':           dens_val,
                'Bulk_DP':           bulk_val,
                'BC':                bc_val,
                'OC':                oc_val,
                'OIN':               oin_val,
                'Na':                na_val,
                'Cl':                cl_val,
                'SO4':               so4_val,
                'NO3':               no3_val,
                'NH4':               nh4_val,
                'SOA':               soa_val,
                'Num_Conc':          numc_val,
                'CCN_CS_0.1':        ccn_cs_01,
                'CCN_CS_0.15':       ccn_cs_015,
                'CCN_CS_0.2':        ccn_cs_02,
                'CCN_CS_0.25':       ccn_cs_025,
                'CCN_CS_0.3':        ccn_cs_03,
                'CCN_CS_0.4':        ccn_cs_04,
                'CCN_CS_0.6':        ccn_cs_06,
                'CCN_CS_0.8':        ccn_cs_08,
                'CCN_CS_1.0':        ccn_cs_1,
                'CCN_VS_0.1':        ccn_vs_01,
                'CCN_VS_0.15':       ccn_vs_015,
                'CCN_VS_0.2':        ccn_vs_02,
                'CCN_VS_0.25':       ccn_vs_025,
                'CCN_VS_0.3':        ccn_vs_03,
                'CCN_VS_0.4':        ccn_vs_04,
                'CCN_VS_0.6':        ccn_vs_06,
                'CCN_VS_0.8':        ccn_vs_08,
                'CCN_VS_1.0':        ccn_vs_1
            }
            all_data.append(row)



In [None]:
df = pd.DataFrame(all_data)
df

Unnamed: 0,Global_ID,DayofYear,Time_hr,Temperature,RH,Density,Bulk_DP,BC,OC,OIN,...,CCN_CS_0.3,CCN_CS_0.6,CCN_CS_1.0,CCN_vS_0.1,CCN_vS_0.2,CCN_vS_0.3,CCN_vS_0.4,CCN_vS_0.6,CCN_vS_0.8,CCN_vS_1.0
0,1,116,1,283.512,0.5398,1.228736,1.521474e-07,8.212753e-11,1.484979e-08,2.277779e-10,...,1.111962e+07,7.971885e+07,1.084144e+08,4.393146e+06,3.997185e+07,9.075370e+07,1.420399e+08,1.709764e+08,1.788928e+08,1.798225e+08
1,1,116,2,283.512,0.5398,1.228736,1.592476e-07,1.195284e-10,2.245956e-08,3.408038e-10,...,2.273840e+08,2.635519e+08,2.653157e+08,1.580388e+08,2.430337e+08,2.605994e+08,2.657191e+08,2.669471e+08,2.673306e+08,2.673770e+08
2,1,116,3,283.512,0.5398,1.228736,1.781222e-07,1.346392e-10,2.631963e-08,4.003313e-10,...,2.948446e+08,3.069291e+08,3.077942e+08,2.213377e+08,2.990688e+08,3.054373e+08,3.079360e+08,3.086115e+08,3.086765e+08,3.086859e+08
3,1,116,4,283.512,0.5398,1.228736,1.952056e-07,1.550557e-10,2.997342e-08,4.639134e-10,...,3.498774e+08,3.609535e+08,3.616065e+08,2.870961e+08,3.545253e+08,3.603278e+08,3.613105e+08,3.615338e+08,3.617893e+08,3.617893e+08
4,1,116,5,283.512,0.5398,1.228736,2.097518e-07,1.856348e-10,3.633321e-08,5.587985e-10,...,4.315734e+08,4.410223e+08,4.418887e+08,3.788440e+08,4.347586e+08,4.400085e+08,4.415262e+08,4.420140e+08,4.420699e+08,4.420699e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47995,2000,350,20,243.334,0.5070,1.431618,7.136350e-08,3.568246e-10,1.829328e-08,0.000000e+00,...,5.659374e+09,9.661602e+09,1.039488e+10,4.875915e+08,2.636004e+09,4.444928e+09,6.674724e+09,8.776683e+09,1.046341e+10,1.110447e+10
47996,2000,350,21,243.334,0.5070,1.431618,7.272676e-08,3.385150e-10,1.733479e-08,0.000000e+00,...,5.339608e+09,9.058949e+09,9.754838e+09,4.801673e+08,2.528741e+09,4.272461e+09,6.314048e+09,8.247421e+09,9.701073e+09,1.034036e+10
47997,2000,350,22,243.334,0.5070,1.431618,7.367692e-08,3.181432e-10,1.645051e-08,0.000000e+00,...,5.114645e+09,8.461385e+09,9.067507e+09,4.697605e+08,2.391724e+09,4.066925e+09,6.021224e+09,7.803936e+09,9.101644e+09,9.609184e+09
47998,2000,350,23,243.334,0.5070,1.431618,7.464950e-08,3.025912e-10,1.569808e-08,0.000000e+00,...,4.802473e+09,7.924662e+09,8.471231e+09,4.582664e+08,2.289063e+09,3.874296e+09,5.742989e+09,7.353551e+09,8.581193e+09,8.949526e+09


In [54]:
out_csv = os.path.join('/data/keeling/a/xx24/e/proj_ml', 'merged_data.csv')
df.to_csv(out_csv, index=False)
print(f'Saved merged dataset to {out_csv}')

Saved merged dataset to /data/keeling/a/xx24/e/proj_ml/merged_data.csv
