In [4]:
import tkinter as tk
from tkinter import ttk
from tkinter import filedialog
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from scipy.interpolate import interp1d
import numpy as np

matplotlib.use('TkAgg')
plt.ion()  # enable interactive mode for TkAgg backend

SKIP_CELL = False  # Change to False to allow execution

In [5]:
def select_directory(root, close=True):
    """
    Opens a directory selection dialog and returns the selected path
    
    Args:
        root: Tkinter root window
        close: Close the root window after selection (default: True)
        
    Returns:
        str: Selected directory path or empty string if cancelled
    """
    directory = filedialog.askdirectory(
        parent=root,
        initialdir=".",
        title="Select Directory"
    )
    if close:
        root.destroy()
    return directory

def select_files(root, close=True):
    """
    Opens a file selection dialog and returns the selected paths
    
    Args:
        root: Tkinter root window
        close: Close the root window after selection (default: True)
        
    Returns:
        list: Selected file paths or empty list if cancelled
    """
    files = filedialog.askopenfilenames(
        parent=root,
        initialdir=".",
        title="Select Files",
        filetypes=(("All files", "*.*"),)
    )
    if close:
        root.destroy()
    return files

def getdir():
    root = tk.Tk()
    return select_directory(root, close=True)

def getfiles():
    root = tk.Tk()
    return select_files(root, close=True)


In [None]:
if SKIP_CELL == False:
    d = getdir() # open dir # run this cell to open a dir and save it on the variable d
    print(d)
    substratefile = d[0]

In [None]:
if SKIP_CELL == False:
    f = getfiles() # open files # run this cell to open files and save them to the array f
    print(len(f), f)
    samplefile = f[0]

In [None]:
# Newton spectrum reader (neglects header)
def get_spec_Netwtonformat(spec):
    # Read the entire file first
    df_raw = pd.read_csv(spec, sep='\t', header=None, names=['Wavelength', 'Intensity'])
    
    # Find where numeric data starts
    start_idx = None
    for i, val in enumerate(df_raw['Wavelength']):
        try:
            float(val)
            start_idx = i
            break
        except (ValueError, TypeError):
            continue
    
    if start_idx is not None:
        # Extract only the numeric data rows
        df = df_raw.iloc[start_idx:].copy()
        
        # Convert columns to numeric
        df['Wavelength'] = pd.to_numeric(df['Wavelength'])
        df['Intensity'] = pd.to_numeric(df['Intensity'])
        
        # Reset index
        df = df.reset_index(drop=True)
        
        return df
    else:
        print("Could not find any numeric data in the file")
        return None

In [None]:
def read_spectrum_txtfile(file_path):
    """
    Read a spectrum file with header and three columns: WL, BG, PL with \t delimiter
    
    Args:
        file_path: Path to the spectrum file
        
    Returns:
        pd.DataFrame: DataFrame with WL, BG, PL columns
    """
    # Read the file line by line
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    # Find the header line
    header_idx = None
    for i, line in enumerate(lines):
        if 'WL' in line and 'BG' in line and 'PL' in line:
            header_idx = i
            break
    
    if header_idx is None:
        print(f"Could not find header line with WL, BG, PL columns in {file_path}")
        return None
    
    # Extract data lines (after header)
    data_lines = lines[header_idx + 1:]
    
    # Parse data lines
    data = []
    for line in data_lines:
        line = line.strip()
        if line:  # Skip empty lines
            values = [val.strip() for val in line.split('\t')]
            if len(values) >= 3:  # Ensure we have at least 3 values
                try:
                    data.append([
                        float(values[0]),  # WL
                        float(values[1]),  # BG
                        float(values[2])   # PL
                    ])
                except (ValueError, TypeError):
                    # Skip lines that can't be converted to float
                    continue
    
    # Convert to DataFrame
    df = pd.DataFrame(data, columns=['WL', 'BG', 'PL'])
    
    return df

In [None]:
# example usage
# 1: get the spectrum filename using getfiles()
# 2: read the file using read_spectrum_txtfile() or get_spec_Netwtonformat()
# 3: process the data in the pandas DataFrame