## Purpose

Want to import several Raman/SERS measurements of different analytes and different concentrations, clean up the data and then place the spectra in a dataframe which has the intensity values along with the analyte + concentration.

Once the data is imported, it needs to be cleaned up. The data needs to be baseline corrected, filtered, and interpolated to have values at wholenumbers.

Note different settings/ different Raman spectrometers as well as different SERS substrates were used for the measurements.


## Libraries

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

import scipy 
from scipy import interpolate #Need to get values at whole numbers
from scipy.signal import savgol_filter #need to filter spectrum to help clean it up
from BaselineRemoval import BaselineRemoval # https://pypi.org/project/BaselineRemoval/



## The Data

In [2]:
## Get the path to the spectra files as well as the name of the individual folders in the main 
#folder/directory
Path = '/Users/JRav/Documents/Learning/Learning software/Python/Personal/Spectra Files'
filelist = os.listdir(Path)
print(filelist)
#type(filelist)
#remove non spectra folders from the list
filelist.remove('.DS_Store')
print(filelist)

#Make empty list to store all paths to subdirectories in.
Path2 = []
for x in filelist:
    Path2.append(Path +'/'+ x)
#print(Path2)
#Path2[1]

['Thiram 10ppm', 'Thiabendazole 1ppm', 'BP4T 10-3M', '.DS_Store', 'Water', 'Rhodamine 6G 10-8M', 'Rhodamine 6G 10-6M', 'BP4T 10-4M', 'Thiram 1ppm', 'Rhodamine 6G 10-5M', 'Thiram 100ppm', 'Thiram 1000ppm', 'Malachite Green 1ppm', 'Chlorpyrifos 100ppm', 'Chlorpyrifos 1ppm', 'Oxycodone 100ppm', 'BP4T 10-6M', 'Thiram 0.1ppm', 'Rhodamine 6G 10-7M', 'BP4T 10-5M', 'Chlorpyrifos 1000ppm', 'Cocaine 100ppm']
['Thiram 10ppm', 'Thiabendazole 1ppm', 'BP4T 10-3M', 'Water', 'Rhodamine 6G 10-8M', 'Rhodamine 6G 10-6M', 'BP4T 10-4M', 'Thiram 1ppm', 'Rhodamine 6G 10-5M', 'Thiram 100ppm', 'Thiram 1000ppm', 'Malachite Green 1ppm', 'Chlorpyrifos 100ppm', 'Chlorpyrifos 1ppm', 'Oxycodone 100ppm', 'BP4T 10-6M', 'Thiram 0.1ppm', 'Rhodamine 6G 10-7M', 'BP4T 10-5M', 'Chlorpyrifos 1000ppm', 'Cocaine 100ppm']


'/Users/JRav/Documents/Learning/Learning software/Python/Personal/Spectra Files/Thiabendazole 1ppm'

## Preprocessing Parameters

In [40]:
# Using a Savitzky-Golay filter on the raw spectra to clean up some of noise in the signal

SGpoly = 3
SGframe = 15

# Baseline correction parameters


# Looking at the 'molecular fingerprint' for Raman spectra, aka 600-1800 cm^-1 range

WaveNumLow = 600
WaveNumHigh = 1800
WaveNumbers = np.arange(WaveNumLow, WaveNumHigh+1)

type(WaveNumbers)

numpy.ndarray

# Preprocessing functions

## Sav-Golay filter function

In [4]:
def SG(y0, SGframe, SGpoly):
    y1 = savgol_filter(y0, window_length = SGframe, polyorder = SGpoly)
    return y1

## Baseline correction function

In [5]:
def Baseline(spectrum):
    baseObj=BaselineRemoval(spectrum)
    Zhangfit_output=baseObj.ZhangFit()
    return Zhangfit_output

## Interpolation function

In [6]:
def Interpolation(x0,WaveNumbers,y):
    tck = interpolate.splrep(x0, y, s=0)
    ynew = interpolate.splev(WaveNumbers, tck, der=0)
    return ynew

# Importing data and preprocessing

In [42]:
AnalyteList = []
ConcentrationList = []
RawSpectra = pd.DataFrame()
Spectra = pd.DataFrame()
#RawSpectra = pd.DataFrame(columns = [WaveNumbers])  
#Spectra = pd.DataFrame(columns = [WaveNumbers]) 
k = 0

for i in Path2:
    filelist2 = os.listdir(i)
    for j in filelist2:
        
        if j.endswith('.txt') and j != '.txt':
            
            
            name = i + '/' +j
            file = pd.read_csv(name, sep = '\t', header = None, names = ['Wavenumber', 'Intensity'])
            x0 = file['Wavenumber']
            y0 = file['Intensity']
            
            #filtering the spectra
            y1 = SG(y0, SGframe, SGpoly) 
            
            #baseline correcting the spectra
            y2 = Baseline(y1) 
            
            #getting intensity values at each whole wavenumber
            y3 = np.around(Interpolation(x0,WaveNumbers,y2),1)
            yRaw = np.around(Interpolation(x0,WaveNumbers,y0),1)

            #filling 2 dataframes, 1 Raw (interpolated though) and other fully cleaned up spectra
            RawSpectra[k] = yRaw.tolist()
            Spectra[k] = y3.tolist()
            
            #making a list for both the analyte and concentration for future dataframe
            AnalyteList.append(j.split('_')[0].split()[0])
            ConcentrationList.append(j.split('_')[0].split()[1])
            
            k = k +1

RawSpectra = RawSpectra.transpose()
Spectra = Spectra.transpose()

RawSpectra.columns = WaveNumbers
Spectra.columns = WaveNumbers

ok


## Adding analyte list and concentration list to spectra dataframes   

In [44]:
se1 = pd.Series(AnalyteList)
se2 = pd.Series(ConcentrationList)

RawSpectra.insert(0,'Analyte',se1)
RawSpectra.insert(1,'Concentration',se2)

Spectra.insert(0,'Analyte',se1)
Spectra.insert(1,'Concentration',se2)

## Export the raw and cleaned up Spectra dataframes

In [None]:
Spectra.to_csv('Spectra.csv')
RawSpectra.to_csv('RawSpectra.csv')