In [3]:
import pandas as pd

# STEP 1: Load the raw data file
# We start by importing the raw .csv file generated from the CARY 50 UV-Vis spectrophotometer.
df = pd.read_csv("assay6.csv") 


# STEP 2: Drop the header row with "Wavelength (nm)", "Abs", etc.
# The first row contains column labels like "Wavelength (nm)" and "Abs". It's not actual data, so we remove it.
df = df.drop(index=0).reset_index(drop=True)
df.head()

# STEP 3: Convert all data to numeric (ignore non-numeric errors)
# We convert all values in the dataset to numeric values. Any non-numeric content (like empty strings or stray text) is automatically converted to NaN and ignored.
df = df.apply(pd.to_numeric, errors='coerce')
 
# STEP 4: Set the wavelength cutoff range
# We set the minimum and maximum wavelengths we want to keep — between 400 nm and 600 nm.
lower_bound = 400
upper_bound = 600

# STEP 5: Initialize a list to hold all cleaned sample data
# We’ll use this to store each sample's filtered result.
filtered_samples = []

# STEP 6: Loop through each pair of columns (Wavelength + Absorbance)
# The data is organized in pairs of columns: one column for wavelength, and the next for absorbance.
# We loop through these pairs, one sample at a time.
for i in range(0, df.shape[1], 2):
    try:
        # Extract columns
        wavelength_col = df.iloc[:, i]
        absorbance_col = df.iloc[:, i + 1]

        # Apply filtering
        # We filter each sample to include only the rows where wavelength is between 400 and 600 nm.
        mask = (wavelength_col >= lower_bound) & (wavelength_col <= upper_bound)
        filtered_wavelength = wavelength_col[mask].reset_index(drop=True)
        filtered_absorbance = absorbance_col[mask].reset_index(drop=True)

        # Combine into one sample DataFrame
        # We then combine those filtered results into a clean format:
        sample_df = pd.DataFrame({
            f'Wavelength_{i//2 + 1}': filtered_wavelength,
            f'Absorbance_{i//2 + 1}': filtered_absorbance
        })

        filtered_samples.append(sample_df)
        
    # If a sample is broken or incomplete, we safely skip it:
    except Exception as e:
        print(f"⚠️ Skipping columns {i} and {i+1}: {e}")

# STEP 7: Combine all filtered samples into one final DataFrame
# We put all the cleaned and filtered samples side-by-side into one final table.
result_df = pd.concat(filtered_samples, axis=1)

# STEP 8: Save to CSV
result_df.to_csv("truncated data set.csv", index=False)

print("✅ All samples processed and saved to 'truncated data set.csv'")


⚠️ Skipping columns 480 and 481: single positional indexer is out-of-bounds
✅ All samples processed and saved to 'truncate data set.csv'
