# Quick analysis

In [None]:
from analysis.plotting import darken, get_custom_colorpalette, get_default_plotly_layout
from analysis.load_spectra import load_chlamy_spectra
from scipy.signal import savgol_filter
import plotly.express as px


# Load all the spectra of Chlamydomonas and the corresponding metadata
data_dir = '/Volumes/Microscopy/Wasatch-Raman_785/2025-03-12-ACDC/20250110TAPP15'

# Configure plotly layout
layout = get_default_plotly_layout()

# Parameters for baseline subtraction
lam = 1e4
wavenumber_range_cm1 = (520, 3200)

# Parameters for SNR calculation
noisy_region_cm1 = (1700, 2500)

# Parameters for peak finding
relative_prominences = {
    ("horiba", 785): 0.5,
    ("renishaw", 785): 0.1,
    ("wasatch", 785): 0.1,
    ("openraman", 532): 0.5,
    ("wasatch", 532): 0.1,
}
peak_separation = 10


chlamy_spectra, chlamy_dataframe = load_chlamy_spectra(data_dir)

# Preprocessing parameters
savgol_window_length = 9
savgol_polynomial_order = 3
fingerprint_region_cm1 = (300, 1800)

# Show a sample of spectra metadata
print(f"Total number of Chlamydomonas spectra: {len(chlamy_dataframe)}")

# one color for each well
color_palette = px.colors.sample_colorscale("Viridis", len(chlamy_dataframe.well_ID.unique()))
color_map = {well: color_palette[i] for i, well in enumerate(chlamy_dataframe.well_ID.unique())}
chlamy_dataframe.sample(10, random_state=57)

In [None]:
from ramanalysis import RamanSpectrum
from pybaselines.whittaker import aspls
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
import numpy as np

# Load spectrum from Renishaw glass slide control
txt_filepath = "data/Renishaw_Qontor/glass_slide_background.txt"
glass_slide_control_spectrum = RamanSpectrum.from_renishaw_txtfile(txt_filepath)

# Store preprocessed spectra
preprocessed_chlamy_spectra = []


for i, dataframe_row in chlamy_dataframe.iterrows():
    # Get the raw spectrum and its associated metadata
    raw_spectrum = chlamy_spectra[i]
    wavelength_nm = dataframe_row["λ_nm"]
    instrument = dataframe_row["instrument"]
    well_ID = dataframe_row["well_ID"]
    site = dataframe_row["site"]

    # Preprocessing steps
    # -------------------
    # 1) Denoising
    smoothed_intensities = savgol_filter(
        raw_spectrum.intensities,
        window_length=savgol_window_length,
        polyorder=savgol_polynomial_order,
    )
    # 1.5) Glass slide background subtraction for the Renishaw
    if instrument == "renishaw":
        smoothed_intensities -= glass_slide_control_spectrum.intensities
    # 2) Crop spectral range to the fingerprint region
    cropped_spectrum = RamanSpectrum(
        wavenumbers_cm1=raw_spectrum.wavenumbers_cm1,
        intensities=smoothed_intensities,
    ).between(*fingerprint_region_cm1)
    # 2.5) Additional cropping for 532 nm Wasatch
    if (instrument == "wasatch") and (wavelength_nm == 532):
        cropped_spectrum = cropped_spectrum.between(0, 1700)
    # 3) Baseline subtraction
    baseline_estimate, _params = aspls(cropped_spectrum.intensities, lam=lam)
    baseline_subtracted_intensities = cropped_spectrum.intensities - baseline_estimate
    # 4) Compose into RamanSpectrum and normalize
    preprocessed_spectrum = RamanSpectrum(
        wavenumbers_cm1=cropped_spectrum.wavenumbers_cm1,
        intensities=baseline_subtracted_intensities,
    ).normalize()
    preprocessed_chlamy_spectra.append(preprocessed_spectrum)

# Create figure
fig = make_subplots(
    rows=1,
    cols= 1,# 2,
    shared_xaxes=False,
    shared_yaxes=False,
    horizontal_spacing=0.02,
    column_titles=["Raw", "Preprocessed"],
)
fig.update_layout(
    layout,
    height=700,
    width=1300
)

chlamy_dataframe_view = chlamy_dataframe #.groupby('well_ID').head(2)

plotted_wells = []
for i, dataframe_row in chlamy_dataframe_view.iterrows(): # .sample(num_samples).iterrows():
    # Get the raw and preprocessed spectra and its associated metadata
    raw_spectrum = chlamy_spectra[i]
    preprocessed_spectrum = preprocessed_chlamy_spectra[i]
    well_ID = dataframe_row["well_ID"]
    site = dataframe_row["site"]

    # Plot each raw spectrum
    fig.add_trace(
        go.Scatter(
            x=raw_spectrum.wavenumbers_cm1, # preprocessed_spectrum.wavenumbers_cm1
            y=raw_spectrum.intensities, # preprocessed_spectrum.intensities
            # x=preprocessed_spectrum.wavenumbers_cm1,
            # y=preprocessed_spectrum.intensities,
            hovertext=f"{well_ID} | {site}",
            hoverinfo="text",
            legendgroup=well_ID,
            showlegend=False,
            marker={"color": color_map[well_ID]},
            opacity=0.5,
        ),
        row=1,
        col=1,
    )

    '''
    # Plot each preprocessed spectrum
    fig.add_trace(
        go.Scatter(
            x=preprocessed_spectrum.wavenumbers_cm1,
            y=preprocessed_spectrum.intensities,
            hoverinfo="skip",
            legendgroup=well_ID,
            showlegend=False,
            marker={"color": color_map[well_ID]},
            opacity=0.5,
        ),
        row=1,
        col=2,
    )'''
    plotted_wells.append(well_ID)

plotted_wells = pd.Series(plotted_wells)
# Plot fake traces for legend
for well_ID in plotted_wells.unique():
    fig.add_trace(
        go.Scatter(
            x=[None],
            y=[None],
            mode="lines",
            name=well_ID,
            line={"color": color_map[well_ID], "width":3},
            legendgroup=well_ID,
            showlegend=True,
        )
    )

# Configure plotly layout
fig.update_layout(layout, height=600)
fig.update_xaxes(title_text="Wavenumber (cm<sup>-1</sup>)", row=1, col=1)
fig.update_xaxes(title_text="Wavenumber (cm<sup>-1</sup>)", row=1, col=2)
fig.update_yaxes(title_text="Intensity", row=1, col=1)
fig.show()


In [None]:
import plotly.express as px
import numpy as np
import pandas as pd
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis



# Set number of LDA components
num_lda_components = 2

# Define groups and group labels
group_labels = {well:i for i, well in enumerate(chlamy_dataframe.well_ID.unique())}

# Filter DataFrame for Wasatch 785 nm group
instrument = "wasatch"
wavelength_nm = 785
grp = chlamy_dataframe.query("instrument == @instrument & λ_nm == @wavelength_nm")

# Compose training data
X = []
y = []
for _well_ID, label in group_labels.items():
    indices_in_group = grp.query("well_ID == @_well_ID").index.values
    spectra_in_group = [preprocessed_chlamy_spectra[i] for i in indices_in_group]
    for spectrum in spectra_in_group:
        X.append(spectrum.intensities)
        y.append(label)
X = np.array(X)
y = np.array(y)

# Construct, fit, and transform LDA
lda = LinearDiscriminantAnalysis(n_components=num_lda_components)
spectral_components = lda.fit(X, y).transform(X)
variances = lda.explained_variance_ratio_

# Add LDA components to DataFrame for plotting
source = pd.DataFrame(
    {
        "LD-1": spectral_components[:, 0],
        "LD-2": spectral_components[:, 1],
        "label": y,
    }
)
group_labels_inverted = {k: v for v, k in group_labels.items()}
source["Group"] = source["label"].map(group_labels_inverted)

# LDA plot
fig = px.scatter(
    source,
    x="LD-1",
    y="LD-2",
    color="Group",
    color_discrete_map=color_map,
    marginal_x="box",
    marginal_y="box",
)

# Configure plotly layout
fig.update_layout(
    layout,
    title=f"{instrument.capitalize()} | {wavelength_nm} nm",
    xaxis_title=f"LD-1 ({variances[0]:.1%})",
    yaxis_title=f"LD-2 ({variances[1]:.1%})",
    height=500,
    width=600,
    # xaxis={"scaleanchor": "y"},
)
fig.show()
