In [3]:
import pandas as pd
import xarray as xr
import os

In [None]:
cases = {
    'case_1': 800,
    'case_2': 400,
    'case_3': 400,
    'case_4': 200,
    'case_5': 100,
    'case_6': 100,
}

base_root = '/data/keeling/a/xx24/e/proj_ml/scenario_libs'

In [None]:
all_data = []
global_id = 0  

for case_name, n_scenarios in cases.items():
    case_path = os.path.join(base_root, case_name, 'scenarios')

    for local_idx in range(n_scenarios):
        scenario_dir = f'scenario_{local_idx:04d}'
        scenario_path = os.path.join(case_path, scenario_dir, 'out_init')

        aero = xr.open_dataset(os.path.join(scenario_path, 'urban_plume_process.nc'))
        # take [1:] 1-24h
        bc_arr    =   aero['tot_bc_mass_conc'  ][1:].values
        oc_arr    =   aero['tot_oc_mass_conc'  ][1:].values
        oin_arr   =   aero['tot_oin_mass_conc' ][1:].values
        nacl_arr  = ( aero['tot_na_mass_conc'  ] + aero['tot_cl_mass_conc'] )[1:].values
        so4_arr   =   aero['tot_so4_mass_conc' ][1:].values
        soa_arr   =   aero['tot_soa_mass_conc' ][1:].values
        dens_arr  =   aero['density'           ][1:].values
        numc_arr  =   aero['tot_num_conc'      ][1:].values
        dpnum_arr =   aero['tot_dp_num_conc'   ][1:].values
        bulk_dp_arr = dpnum_arr / numc_arr
        
        global_id += 1

        for j in range(2, 26):
            idx = j - 2
            suffix = f'{j:08d}'
            pre_path = os.path.join(scenario_path, f'urban_plume_0001_{suffix}.nc')
            ccn_path = os.path.join(scenario_path, f'urban_plume_{suffix}_process.nc')

            if not os.path.exists(pre_path) or not os.path.exists(ccn_path):
                continue

            pre = xr.open_dataset(pre_path)
            ccn = xr.open_dataset(ccn_path)

            time_hr = int(pre['time'].values / 3600)
            doy     = int(pre['start_day_of_year'].values)
            temp    = float(pre['temperature'].values)
            rh      = float(pre['relative_humidity'].values)

            bc_val      = float(bc_arr   [idx])
            oc_val      = float(oc_arr   [idx])
            oin_val     = float(oin_arr  [idx])
            nacl_val    = float(nacl_arr [idx])
            so4_val     = float(so4_arr  [idx])
            soa_val     = float(soa_arr  [idx])
            dens_val    = float(dens_arr [idx])
            numc_val    = float(numc_arr [idx])
            bulk_val    = float(bulk_dp_arr[idx])

            ccs = ccn['sc_dist']
            cvs = ccn['sc_varying_sigma_dist']
            sc   = ccn['sc']
            ccn_cs_01 = float(ccs.sel(sc=sc<=0.001).sum())
            ccn_cs_03 = float(ccs.sel(sc=sc<=0.003).sum())
            ccn_cs_06 = float(ccs.sel(sc=sc<=0.006).sum())
            ccn_cs_1  = float(ccs.sel(sc=sc<=0.01 ).sum())
            ccn_vs_01 = float(cvs.sel(sc=sc<=0.001).sum())
            ccn_vs_03 = float(cvs.sel(sc=sc<=0.003).sum())
            ccn_vs_06 = float(cvs.sel(sc=sc<=0.006).sum())
            ccn_vs_1  = float(cvs.sel(sc=sc<=0.01 ).sum())

            row = {
                'Global_ID':         global_id,
                'DayofYear':         doy,
                'Time_hr':           time_hr,
                'Temperature':       temp,
                'RH':                rh,
                'Density':           dens_val,
                'Bulk_DP':           bulk_val,
                'BC':                bc_val,
                'OC':                oc_val,
                'OIN':               oin_val,
                'NaCl':              nacl_val,
                'SO4':               so4_val,
                'SOA':               soa_val,
                'Num_Conc':          numc_val,
                'CCN_CS_0.1':        ccn_cs_01,
                'CCN_CS_0.3':        ccn_cs_03,
                'CCN_CS_0.6':        ccn_cs_06,
                'CCN_CS_1.0':        ccn_cs_1,
                'CCN_VS_0.1':        ccn_vs_01,
                'CCN_VS_0.3':        ccn_vs_03,
                'CCN_VS_0.6':        ccn_vs_06,
                'CCN_VS_1.0':        ccn_vs_1,
            }
            all_data.append(row)



In [None]:
# 合并并写出
df = pd.DataFrame(all_data)
df

Unnamed: 0,Global_ID,DayofYear,Time_hr,Temperature,RH,Density,Bulk_DP,BC,OC,OIN,...,SOA,Num_Conc,CCN_CS_0.1,CCN_CS_0.3,CCN_CS_0.6,CCN_CS_1.0,CCN_VS_0.1,CCN_VS_0.3,CCN_VS_0.6,CCN_VS_1.0
0,1,334,1,236.748,0.567,1.471443,6.230634e-08,1.878388e-08,2.588518e-08,0.0,...,5.174586e-12,1.457665e+10,3.197213e+08,1.519017e+10,4.540427e+10,6.539842e+10,5.724752e+08,1.546061e+10,4.561667e+10,6.555437e+10
1,1,334,2,236.748,0.567,1.471443,6.125670e-08,2.801139e-08,3.862410e-08,0.0,...,1.449556e-11,2.048290e+10,4.951969e+08,2.134333e+10,5.934298e+10,9.109879e+10,9.066837e+08,2.182838e+10,6.057814e+10,9.157202e+10
2,1,334,3,236.748,0.567,1.471443,6.098563e-08,3.278056e-08,4.520658e-08,0.0,...,1.953506e-11,2.351051e+10,5.831595e+08,2.312941e+10,6.986333e+10,1.066106e+11,1.041252e+09,2.372651e+10,7.026248e+10,1.071496e+11
3,1,334,4,236.748,0.567,1.471443,6.185355e-08,3.769009e-08,5.205097e-08,0.0,...,2.381121e-11,2.624660e+10,6.440266e+08,2.708690e+10,8.052507e+10,1.187365e+11,1.197547e+09,2.763304e+10,8.181836e+10,1.195588e+11
4,1,334,5,236.748,0.567,1.471443,6.155550e-08,4.605387e-08,6.357667e-08,0.0,...,3.013101e-11,3.139562e+10,8.991524e+08,3.149941e+10,9.547120e+10,1.403756e+11,1.558382e+09,3.231951e+10,9.693371e+10,1.414322e+11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4795,200,350,20,243.334,0.507,1.431618,7.136350e-08,3.568246e-10,1.829328e-08,0.0,...,4.134487e-10,1.218734e+10,9.930432e+08,1.938815e+10,4.223954e+10,5.643048e+10,2.646979e+09,2.413009e+10,4.764581e+10,6.028261e+10
4796,200,350,21,243.334,0.507,1.431618,7.272676e-08,3.385150e-10,1.733479e-08,0.0,...,4.015244e-10,1.118244e+10,1.015139e+09,1.858686e+10,3.988561e+10,5.295590e+10,2.606675e+09,2.319383e+10,4.477262e+10,5.613450e+10
4797,200,350,22,243.334,0.507,1.431618,7.367692e-08,3.181432e-10,1.645051e-08,0.0,...,3.882045e-10,1.033560e+10,1.033987e+09,1.781788e+10,3.758268e+10,4.922461e+10,2.550180e+09,2.207804e+10,4.236508e+10,5.216519e+10
4798,200,350,23,243.334,0.507,1.431618,7.464950e-08,3.025912e-10,1.569808e-08,0.0,...,3.823352e-10,9.605736e+09,1.023579e+09,1.688331e+10,3.551538e+10,4.598761e+10,2.487782e+09,2.103232e+10,3.992008e+10,4.858412e+10


In [None]:
out_csv = os.path.join('/data/keeling/a/xx24/e/proj_ml', 'merged_data.csv')
df.to_csv(out_csv, index=False)
print(f'Saved merged dataset to {out_csv}')

Saved merged dataset to /data/keeling/a/xx24/e/proj_ml/merged_data.csv
