# Costruzione matrice per lavorare

Questo programma servirà esclusivamente per costruire e poi salvare la matrice originale "matrice_con_t.dat".
La matrice è fondamentale per runnare e studiare la convoluzione con la svd, quindi questo è il primo passo di preprocessing.
Verranno solo uniti gli spettri di tutte le temperature in solo 1.

In [None]:
import pandas as pd
import os
import glob

# Folder relativa nella directory di lavoro del container (dove parte il Jupyter Notebook)
folder_path = os.path.join(os.getcwd(), "Dati")

file_list = glob.glob(os.path.join(folder_path, "*.txt"))

dfs=[]

for file_path in file_list:
    with open(file_path, 'r') as file:
        lines = file.readlines()

    start_idx = next(i for i, line in enumerate(lines) if "XYDATA" in line) + 1

    df = pd.read_csv(
        file_path,
        sep="\t",
        skiprows=start_idx,
        header=None,
        usecols=[0, 1],
        names=["Wavelength", os.path.splitext(os.path.basename(file_path))[0]]
    )

    dfs.append(df)

merged_df = dfs[0]

for df in dfs[1:]:  
    merged_df = pd.merge(merged_df, df, on= "Wavelength", how='inner')

merged_df.head()

Unnamed: 0,Wavelength,CD_Tel23_37.5uM_T100,CD_Tel23_37.5uM_T19,CD_Tel23_37.5uM_T22,CD_Tel23_37.5uM_T24,CD_Tel23_37.5uM_T26,CD_Tel23_37.5uM_T28,CD_Tel23_37.5uM_T30,CD_Tel23_37.5uM_T32,CD_Tel23_37.5uM_T34,...,CD_Tel23_37.5uM_T80,CD_Tel23_37.5uM_T82,CD_Tel23_37.5uM_T84,CD_Tel23_37.5uM_T86,CD_Tel23_37.5uM_T88,CD_Tel23_37.5uM_T90,CD_Tel23_37.5uM_T92,CD_Tel23_37.5uM_T94,CD_Tel23_37.5uM_T96,CD_Tel23_37.5uM_T98
0,330.0,0.432024,0.535653,0.549668,0.547439,0.581914,0.563364,0.612426,0.496127,0.604874,...,0.411057,0.439534,0.435262,0.49714,0.479955,0.403012,0.441934,0.370971,0.431813,0.367093
1,329.5,0.425564,0.521705,0.556836,0.538156,0.581846,0.577134,0.60735,0.504032,0.585079,...,0.41418,0.473242,0.44132,0.474463,0.492177,0.403835,0.436586,0.383931,0.423019,0.348449
2,329.0,0.414526,0.531593,0.573395,0.517939,0.584496,0.570374,0.611856,0.501482,0.5714,...,0.406843,0.463936,0.429568,0.478443,0.473265,0.405304,0.436242,0.364719,0.410306,0.356284
3,328.5,0.394576,0.539488,0.584016,0.546564,0.594529,0.561401,0.623771,0.517139,0.549172,...,0.401213,0.467257,0.436169,0.47773,0.475877,0.389666,0.418678,0.353156,0.410643,0.332987
4,328.0,0.382177,0.52882,0.608973,0.553635,0.608059,0.540713,0.627271,0.533439,0.553204,...,0.412983,0.445359,0.445292,0.457624,0.499279,0.375435,0.40844,0.360081,0.405945,0.317857


Ora è necessario scrivere il df in modo leggibile per il software successivo

In [11]:
# 1. Estrai le temperature dai nomi delle colonne (escludendo "Wavelength")
temp_dict = {
    col: int(col.split('_T')[-1])
    for col in merged_df.columns if col != "Wavelength"
}

# 2. Ordina i nomi delle colonne in base alla temperatura
sorted_cols = sorted(temp_dict, key=lambda col: temp_dict[col])

# 3. Ricostruisci l'ordine finale con "Wavelength" all'inizio
final_columns = ["Wavelength"] + sorted_cols
merged_df_sorted = merged_df[final_columns]

# 4. Costruisci la riga con le temperature (metti 'Wavelength' come primo valore)
temperature_row = ['Wavelength'] + [temp_dict[col] for col in sorted_cols]

# 5. Crea DataFrame con la riga delle temperature e concatena
temp_df = pd.DataFrame([temperature_row], columns=merged_df_sorted.columns)
merged_df_final = pd.concat([temp_df, merged_df_sorted], ignore_index=True)
merged_df_final.reset_index(drop=True, inplace=True)
merged_df_final.head()

Unnamed: 0,Wavelength,CD_Tel23_37.5uM_T19,CD_Tel23_37.5uM_T22,CD_Tel23_37.5uM_T24,CD_Tel23_37.5uM_T26,CD_Tel23_37.5uM_T28,CD_Tel23_37.5uM_T30,CD_Tel23_37.5uM_T32,CD_Tel23_37.5uM_T34,CD_Tel23_37.5uM_T36,...,CD_Tel23_37.5uM_T82,CD_Tel23_37.5uM_T84,CD_Tel23_37.5uM_T86,CD_Tel23_37.5uM_T88,CD_Tel23_37.5uM_T90,CD_Tel23_37.5uM_T92,CD_Tel23_37.5uM_T94,CD_Tel23_37.5uM_T96,CD_Tel23_37.5uM_T98,CD_Tel23_37.5uM_T100
0,Wavelength,19.0,22.0,24.0,26.0,28.0,30.0,32.0,34.0,36.0,...,82.0,84.0,86.0,88.0,90.0,92.0,94.0,96.0,98.0,100.0
1,330.0,0.535653,0.549668,0.547439,0.581914,0.563364,0.612426,0.496127,0.604874,0.598425,...,0.439534,0.435262,0.49714,0.479955,0.403012,0.441934,0.370971,0.431813,0.367093,0.432024
2,329.5,0.521705,0.556836,0.538156,0.581846,0.577134,0.60735,0.504032,0.585079,0.595223,...,0.473242,0.44132,0.474463,0.492177,0.403835,0.436586,0.383931,0.423019,0.348449,0.425564
3,329.0,0.531593,0.573395,0.517939,0.584496,0.570374,0.611856,0.501482,0.5714,0.59114,...,0.463936,0.429568,0.478443,0.473265,0.405304,0.436242,0.364719,0.410306,0.356284,0.414526
4,328.5,0.539488,0.584016,0.546564,0.594529,0.561401,0.623771,0.517139,0.549172,0.574798,...,0.467257,0.436169,0.47773,0.475877,0.389666,0.418678,0.353156,0.410643,0.332987,0.394576


Ora, lo salviamo!

In [12]:
if not os.path.exists("matrice_con_t.dat"):
    merged_df_final.to_csv("matrice_con_t.dat", index=False, header=False)

print("File salvato come 'matrice_con_t.dat' senza intestazione.")

File salvato come 'matrice_con_t.dat' senza intestazione.
