In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
data_dir = '/content/drive/MyDrive/NICOS_NN/SOLSTICIO2_0'
spectra_dir = os.path.join(data_dir, 'espectros')
output_csv_path = os.path.join(data_dir, 'merradf_output_structured.csv')

In [4]:
df = pd.read_csv(output_csv_path)

In [5]:
n_spectras = 21024

**WAY 1: DIFFERENT COLUMNS BETWEEN PL AND H2O**

In [None]:
data = {'PL[mbar]': [], 'H2O[vmr]': [], 'SPECTRA_Tb[K]': []}
for i in range(n_spectras):
    # Extract PL and H2O lists for the object
    pl_list = eval(df[' PL[mbar]'].iloc[i])
    h2o_list = eval(df[' H2O[vmr]'].iloc[i])

    # Load spectra data from the .txt file
    spectra_file_path = os.path.join(spectra_dir, f'{i + 1}.txt')
    if os.path.exists(spectra_file_path):
        spectra_data = np.loadtxt(spectra_file_path, usecols=1)  # Extract only the second column
    else:
        print(f"File {i + 1}.txt not found.")
        spectra_data = np.nan  # Handle missing file with NaNs

    # Append data to the new dataframe
    data['PL[mbar]'].append(pl_list)
    data['H2O[vmr]'].append(h2o_list)
    data['SPECTRA_Tb[K]'].append(spectra_data)

In [None]:
new_df = pd.DataFrame(data)

In [None]:
new_df

In [None]:
print(len(new_df['PL[mbar]'].iloc[0]))
print(len(new_df['H2O[vmr]'].iloc[0]))
print(len(new_df['SPECTRA_Tb[K]'].iloc[0]))

In [None]:
new_df.to_csv('/content/drive/MyDrive/NICOS_NN/SOLSTICIO2_0/input_nn_ver1_nicosv.csv', index=False)

**WAY 2: PL AND H2O IN JUST ONE COLUMN OF TUPLES**

In [None]:
data = {'PL[mbar]_H2O[vmr]': [], 'SPECTRA_Tb[K]': []}
for i in range(n_spectras):
    # Extract PL and H2O lists for the object
    pl_list = eval(df[' PL[mbar]'].iloc[i])
    h2o_list = eval(df[' H2O[vmr]'].iloc[i])

    # Combine PL and H2O into a tuple
    pl_h2o_vector = [pl_list, h2o_list]

    # Load spectra data from the .txt file
    spectra_file_path = os.path.join(spectra_dir, f'{i + 1}.txt')
    if os.path.exists(spectra_file_path):
        spectra_data = np.loadtxt(spectra_file_path, usecols=1)  # Extract only the second column
    else:
        print(f"File {i + 1}.txt not found.")
        spectra_data = np.nan  # Handle missing file with NaNs

    # Append data to the new dataframe
    data['PL[mbar]_H2O[vmr]'].append(pl_h2o_vector)
    data['SPECTRA_Tb[K]'].append(spectra_data)

In [None]:
new_df = pd.DataFrame(data)

In [None]:
new_df

In [None]:
print(len(new_df['PL[mbar]_H2O[vmr]'].iloc[0]))
print(len(new_df['PL[mbar]_H2O[vmr]'].iloc[0][0]))
print(len(new_df['PL[mbar]_H2O[vmr]'].iloc[0][1]))
print(len(new_df['SPECTRA_Tb[K]'].iloc[0]))

In [None]:
new_df.to_csv('/content/drive/MyDrive/NICOS_NN/SOLSTICIO2_0/input_nn_ver2_nicosv.csv', index=False)