# 

# AutoOpenRaman acquisition and analysis

This notebook is a basic analysis of multi-position data generated using AutoOpenRaman.

In [None]:
import pathlib
import shutil
import zipfile

import pandas as pd
import ramanspy as rp


def parse_autoopenraman_csv(csv_filename):
    data = pd.read_csv(csv_filename)

    # parse and load data into spectral objects
    if "Wavenumber (cm-1)" in data.columns:
        spectral_axis = data["Wavenumber (cm-1)"]
    elif "Pixel" in data.columns:
        spectral_axis = data["Pixel"]
    else:
        raise ValueError("No valid spectral axis found in the CSV file.")

    spectral_data = data["Intensity"]

    raman_spectrum = rp.Spectrum(spectral_data, spectral_axis)

    return raman_spectrum


def get_wavenumber_axis(csv_filename):
    return parse_autoopenraman_csv(csv_filename).spectral_axis


def get_background_spectrum(bg_csv_filename):
    # Adjust the background spectrum to match the wavenumber axis of the spectrum
    bg_spectrum = parse_autoopenraman_csv(bg_csv_filename)

    first_file = next(
        file for file in pathlib.Path(well_plate_data_dir).iterdir() if file.suffix == ".csv"
    )

    wavenumber_axis = get_wavenumber_axis(first_file)
    adjusted_background = rp.Spectrum(bg_spectrum.spectral_data, wavenumber_axis)
    return adjusted_background


# Unzip the zip file to the specified directory

zip_file = pathlib.Path("data/automated-chlamy-data.zip")
data_dir = pathlib.Path("data/automated-chlamy-data")
well_plate_data_dir = data_dir / "chlamyparentplate1"

with zipfile.ZipFile(zip_file, "r") as zip_ref:
    zip_ref.extractall(data_dir)

dark_control = data_dir / "chlamy-dark.csv"

dark_control_spectrum = get_background_spectrum(dark_control)

rp_pipeline = rp.preprocessing.Pipeline(
    [
        rp.preprocessing.misc.BackgroundSubtractor(background=dark_control_spectrum),
        rp.preprocessing.misc.Cropper(region=(900, 1900)),
        rp.preprocessing.despike.WhitakerHayes(),
        rp.preprocessing.denoise.SavGol(window_length=5, polyorder=3),
        rp.preprocessing.baseline.ASPLS(lam=1e5),
        rp.preprocessing.normalise.MinMax(),
    ]
)

# Initialize an empty list to store the data
data = []

# Cycle through files in the mapping directory
for file in pathlib.Path(well_plate_data_dir).iterdir():
    if file.suffix == ".csv":
        # Parse the filename

        parts = file.name.split("-")
        well = parts[0]
        site = parts[1].split("_")[1]

        # Parse the spectrum from the file
        spectrum = parse_autoopenraman_csv(well_plate_data_dir / file.name)

        # Process the spectrum
        processed_spectrum = rp_pipeline.apply(spectrum)
        # Append the data to the list
        data.append({"well": well, "site": site, "raw": spectrum, "processed": processed_spectrum})

# Create the dataframe
df = pd.DataFrame(data)

## Plot representative spectra

In [None]:
import arcadia_pycolor as apc
import matplotlib.pyplot as plt

apc.mpl.setup()

random_spectra = df.sample(10)["processed"].to_list()
all_spectra_container = rp.SpectralContainer.from_stack(df["processed"].to_list())

plt.figure(figsize=(6, 8))
rp.plot.spectra(random_spectra, plot_type="single stacked")

## Perform NMF on all collected spectra

In [None]:
nmf = rp.analysis.decompose.NMF(n_components=5, max_iter=5000)


projections, components = nmf.apply(all_spectra_container)
components = sorted(components, key=lambda x: x.max(), reverse=True)
plt.figure(figsize=(12, 4))
ax = rp.plot.spectra(
    components,
    all_spectra_container.spectral_axis,
    plot_type="single stacked",
    label=[f"Component {i + 1}" for i in range(len(components))],
    title="NMF components",
)
ax.legend_.remove()

In [None]:
# remove the temporary data directory
shutil.rmtree(data_dir)