In [28]:
import os
import pandas as pd
import numpy as np

In [29]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [30]:
data_dir = '/content/drive/MyDrive/NICOS_NN/SOLSTICIO2_0'
spectra_dir = os.path.join(data_dir, 'espectros')
output_csv_path = os.path.join(data_dir, 'merradf_output_structured.csv')

In [31]:
df = pd.read_csv(output_csv_path)

In [32]:
n_spectras = 21024

**WAY 1: DIFFERENT COLUMNS BETWEEN PL AND H2O**

In [33]:
data = {'PL[mbar]': [], 'H2O[vmr]': [], 'SPECTRA_Tb[K]': []}
for i in range(n_spectras):
    # Extract PL and H2O lists for the object
    pl_list = eval(df[' PL[mbar]'].iloc[i])
    h2o_list = eval(df[' H2O[vmr]'].iloc[i])

    # Load spectra data from the .txt file
    spectra_file_path = os.path.join(spectra_dir, f'{i + 1}.txt')
    if os.path.exists(spectra_file_path):
        spectra_data = np.loadtxt(spectra_file_path, usecols=1)  # Extract only the second column
    else:
        print(f"File {i + 1}.txt not found.")
        spectra_data = np.nan  # Handle missing file with NaNs

    # Append data to the new dataframe
    data['PL[mbar]'].append(pl_list)
    data['H2O[vmr]'].append(h2o_list)
    data['SPECTRA_Tb[K]'].append(spectra_data)

In [34]:
new_df = pd.DataFrame(data)

In [35]:
new_df

Unnamed: 0,PL[mbar],H2O[vmr],SPECTRA_Tb[K]
0,"[0.119703009724617, 0.1594950258731842, 0.2113...","[5.096061158837983e-06, 5.0830312829930335e-06...","[8.037348, 8.037801, 8.038254, 8.038707, 8.039..."
1,"[0.119703009724617, 0.1594950258731842, 0.2113...","[5.108040113555035e-06, 5.086026249045972e-06,...","[8.115931, 8.116404, 8.116878, 8.117351, 8.117..."
2,"[0.119703009724617, 0.1594950258731842, 0.2113...","[5.096061158837983e-06, 5.098753717902582e-06,...","[8.200232, 8.200725, 8.201219, 8.201713, 8.202..."
3,"[0.119703009724617, 0.1594950258731842, 0.2113...","[5.069857706985204e-06, 5.112978669785662e-06,...","[8.271919, 8.272431, 8.272943, 8.273455, 8.273..."
4,"[0.119703009724617, 0.1594950258731842, 0.2113...","[5.011459961679066e-06, 5.109235189593164e-06,...","[8.322955, 8.323478, 8.324002, 8.324525, 8.325..."
...,...,...,...
21019,"[0.119703009724617, 0.1594950258731842, 0.2113...","[6.901134838699363e-06, 6.914309324201895e-06,...","[16.40415, 16.40652, 16.40889, 16.41125, 16.41..."
21020,"[0.119703009724617, 0.1594950258731842, 0.2113...","[6.907124316057889e-06, 6.921795829839539e-06,...","[16.64495, 16.64736, 16.64976, 16.65217, 16.65..."
21021,"[0.119703009724617, 0.1594950258731842, 0.2113...","[6.914611276442884e-06, 6.9292827902245335e-06...","[16.96242, 16.96488, 16.96734, 16.9698, 16.972..."
21022,"[0.119703009724617, 0.1594950258731842, 0.2113...","[6.925092748133466e-06, 6.936769295862177e-06,...","[17.20668, 17.20919, 17.2117, 17.21421, 17.216..."


In [36]:
print(len(new_df['PL[mbar]'].iloc[0]))
print(len(new_df['H2O[vmr]'].iloc[0]))
print(len(new_df['SPECTRA_Tb[K]'].iloc[0]))

67
67
12001


In [37]:
print(type(new_df['PL[mbar]'].iloc[0]))
print(type(new_df['H2O[vmr]'].iloc[0]))
print(type(new_df['SPECTRA_Tb[K]'].iloc[0]))

<class 'list'>
<class 'list'>
<class 'numpy.ndarray'>


In [38]:
new_df.to_pickle('/content/drive/MyDrive/NICOS_NN/SOLSTICIO2_0/input_nn_ver1.pkl')

**WAY 2: PL AND H2O IN JUST ONE COLUMN OF TUPLES**

In [39]:
data = {'PL[mbar]_H2O[vmr]': [], 'SPECTRA_Tb[K]': []}
for i in range(n_spectras):
    # Extract PL and H2O lists for the object
    pl_list = eval(df[' PL[mbar]'].iloc[i])
    h2o_list = eval(df[' H2O[vmr]'].iloc[i])

    # Combine PL and H2O into a tuple
    pl_h2o_vector = [pl_list, h2o_list]

    # Load spectra data from the .txt file
    spectra_file_path = os.path.join(spectra_dir, f'{i + 1}.txt')
    if os.path.exists(spectra_file_path):
        spectra_data = np.loadtxt(spectra_file_path, usecols=1)  # Extract only the second column
    else:
        print(f"File {i + 1}.txt not found.")
        spectra_data = np.nan  # Handle missing file with NaNs

    # Append data to the new dataframe
    data['PL[mbar]_H2O[vmr]'].append(pl_h2o_vector)
    data['SPECTRA_Tb[K]'].append(spectra_data)

In [40]:
new_df = pd.DataFrame(data)

In [41]:
new_df

Unnamed: 0,PL[mbar]_H2O[vmr],SPECTRA_Tb[K]
0,"[[0.119703009724617, 0.1594950258731842, 0.211...","[8.037348, 8.037801, 8.038254, 8.038707, 8.039..."
1,"[[0.119703009724617, 0.1594950258731842, 0.211...","[8.115931, 8.116404, 8.116878, 8.117351, 8.117..."
2,"[[0.119703009724617, 0.1594950258731842, 0.211...","[8.200232, 8.200725, 8.201219, 8.201713, 8.202..."
3,"[[0.119703009724617, 0.1594950258731842, 0.211...","[8.271919, 8.272431, 8.272943, 8.273455, 8.273..."
4,"[[0.119703009724617, 0.1594950258731842, 0.211...","[8.322955, 8.323478, 8.324002, 8.324525, 8.325..."
...,...,...
21019,"[[0.119703009724617, 0.1594950258731842, 0.211...","[16.40415, 16.40652, 16.40889, 16.41125, 16.41..."
21020,"[[0.119703009724617, 0.1594950258731842, 0.211...","[16.64495, 16.64736, 16.64976, 16.65217, 16.65..."
21021,"[[0.119703009724617, 0.1594950258731842, 0.211...","[16.96242, 16.96488, 16.96734, 16.9698, 16.972..."
21022,"[[0.119703009724617, 0.1594950258731842, 0.211...","[17.20668, 17.20919, 17.2117, 17.21421, 17.216..."


In [42]:
print(len(new_df['PL[mbar]_H2O[vmr]'].iloc[0]))
print(len(new_df['PL[mbar]_H2O[vmr]'].iloc[0][0]))
print(len(new_df['PL[mbar]_H2O[vmr]'].iloc[0][1]))
print(len(new_df['SPECTRA_Tb[K]'].iloc[0]))

2
67
67
12001


In [43]:
new_df.to_pickle('/content/drive/MyDrive/NICOS_NN/SOLSTICIO2_0/input_nn_ver2.pkl')