In [1]:
# Whittaker Smoother
import pandas as pd
import numpy as np
from scipy.sparse import diags
from scipy.sparse.linalg import spsolve

# Load the CSV file
data = pd.read_csv('/kaggle/input/blood-malaria/blood2hinfected532nm5pp.csv')

# Extract the spectral data
spectra = data.iloc[:, 1:]

def whittaker_smoother(data, lambda_, d_order=2):
    n = data.shape[0]
    m = data.shape[1]

    d1 = diags([-1, 2, -1], [0, 1, 2], shape=(n, n), format='csc')
    d2 = diags([-1, -1], [0, 2], shape=(n, n), format='csc')
    d_combined = d1 + d2

    a = np.ones(n)
    a[0] = a[-1] = 1 + lambda_

    baseline = np.zeros((n, m))
    for i in range(m):
        spectrum = data.iloc[:, i]
        w = spsolve(diags(a, 0) + lambda_ * d_combined.T @ d_combined, spectrum)
        baseline[:, i] = w

    return baseline

# Define smoothing parameter lambda
lambda_ = 1e6

# Perform baseline correction
baseline_corrected_spectra = spectra - whittaker_smoother(spectra, lambda_)

# Create new DataFrame with corrected spectra
corrected_data = pd.concat([data.iloc[:, 0], baseline_corrected_spectra], axis=1)

# Save corrected data to new CSV file
corrected_data.to_csv('whittaker_smoothing_data.csv', index=False)