In [1]:
import csv

# Peak detection
def detect_peaks(wavelengths, intensities, threshold_intensity):
    significant_peaks = [(wavelength, intensity) for wavelength, intensity in zip(wavelengths, intensities) if intensity > threshold_intensity]
    return significant_peaks

# Assigning characteristics based on article
def assign_characteristics(significant_peaks, database):
    assigned_characteristics = []
    for wavelength, intensity in significant_peaks:
        # Round wavelength to integer
        rounded_wavelength = round(wavelength)
        # Lookup rounded wavelength in database & assign characteristics
        if rounded_wavelength in database:
            characteristics = database[rounded_wavelength]
            assigned_characteristics.append((rounded_wavelength, intensity, characteristics))
        else:
            assigned_characteristics.append((rounded_wavelength, intensity, "No data available"))

    return assigned_characteristics

# Avoiding ignoring data, assigning a range
def load_peak_assignments(filepath):
    assignments = {}
    with open(filepath, newline='') as csvfile:
        reader = csv.reader(csvfile)
        next(reader)  # Skip header row
        for row in reader:
            try:
                # Split range string & take average
                range_values = row[0].split('–')
                if len(range_values) == 2:
                    wavelength = (float(range_values[0].strip()) + float(range_values[1].strip())) / 2
                else:
                    wavelength = float(row[0].strip())
                characteristics = row[1]
                assignments[wavelength] = characteristics
            except ValueError:
                print(f"Ignoring row: {row}. Could not convert wavelength to float.")
    return assignments


# Load spectra data from CSV file
def load_spectra(filepath):
    wavelengths = []
    intensities = []
    with open(filepath, newline='') as csvfile:
        reader = csv.reader(csvfile)
        next(reader)  # Skip header row
        for row in reader:
            wavelength = float(row[1])
            intensity = float(row[2])
            wavelengths.append(wavelength)
            intensities.append(intensity)
    return wavelengths, intensities

# Define pathways to CSV files
peak_assignments_filepath = "/kaggle/input/peak-assignments/peak_assignments.csv"
spectra_filepath = "/kaggle/input/smooth-malaria/smoothed_spectra.csv"

# Load peak assignments & spectra data
peak_assignments = load_peak_assignments(peak_assignments_filepath)
wavelengths, intensities = load_spectra(spectra_filepath)

# Define threshold intensity
threshold_intensity = 20 

# Detect peaks
significant_peaks = detect_peaks(wavelengths, intensities, threshold_intensity)

# Assign characteristics
assigned_characteristics = assign_characteristics(significant_peaks, peak_assignments)

# Print assigned characteristics
print("Assigned characteristics:")
for wavelength, intensity, characteristics in assigned_characteristics:
    if characteristics != "No data available":
        print(f"Wavelength: {wavelength}, Intensity: {intensity}, Characteristics: {characteristics}")

Assigned characteristics:
Wavelength: 1610, Intensity: 47.09619381459333, Characteristics: Cytosine (NH2)
Wavelength: 1609, Intensity: 60.66880166049444, Characteristics: Cytosine (NH2)
Wavelength: 1608, Intensity: 66.87071917167378, Characteristics: Cytosine (NH2)
Wavelength: 1605, Intensity: 69.75147796054445, Characteristics: Phenylalanine, tyrosine, C=C (protein) 
Wavelength: 1603, Intensity: 62.92407321285046, Characteristics: Ring C-C stretch of phenyl 
Wavelength: 1602, Intensity: 51.688499860766264, Characteristics: d(C=C), phenylalanine (protein assignment)
Wavelength: 1560, Intensity: 33.102242667232055, Characteristics: Tryptophan
Wavelength: 1558, Intensity: 40.087073640103064, Characteristics: Tyrosine, amide II, COO
Wavelength: 1554, Intensity: 51.19992524084947, Characteristics: Amide II
Wavelength: 1552, Intensity: 53.72538885193948, Characteristics: n(C=C), porphyrin 
Wavelength: 1548, Intensity: 56.787014628504906, Characteristics: Tryptophan
Wavelength: 1546, Intensi