In [1]:
import os
import pywt
import math
import numpy as np
import scipy.signal as sig
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from matplotlib import cm
from scipy.stats import norm
import numpy.polynomial.polynomial as poly

In [2]:
def import_file(path, limit_low=None, limit_high=None):

    spectrum = np.genfromtxt(path, delimiter=",")
    spectrum = np.transpose(spectrum)
    wavenumbers = spectrum[0]
    intensities = spectrum[1]

    if limit_low is not None:
        limit_low_index = list(wavenumbers).index(limit_low)
    else:
        limit_low_index = 0
        limit_low = wavenumbers[0]

    if limit_high is not None:
        limit_high_index = list(wavenumbers).index(limit_high)
    else:
        limit_high_index = len(wavenumbers)
        limit_high = wavenumbers[-1]

    wavenumbers = wavenumbers[limit_low_index:limit_high_index]
    intensities = intensities[limit_low_index:limit_high_index]
    return wavenumbers, intensities

def import_directory(path, limit_low=None, limit_high=None):
    # files = os.listdir(path)

    # for filename in files:
    #     np.genfromtxt(filename, delimiter=",")
    pass

In [3]:
wavenumbers, intensities = import_file("spectra/E (1).TXT", limit_low=300, limit_high=2000)

In [4]:
intensities_sg = sig.savgol_filter(intensities, 
                                   window_length=3, 
                                   polyorder=1, 
                                   deriv=0)

In [5]:
def plot_raw_spectra(wavenumbers, intensities):
    fig, ax = plt.subplots()
    ax.plot(wavenumbers, intensities_sg, linewidth = 1, color = "red")
    ax.set_xlim(wavenumbers[0], wavenumbers[-1])
    ax.invert_xaxis()
    ax.grid()

In [6]:
%matplotlib
plot_raw_spectra(wavenumbers, intensities_sg)

Using matplotlib backend: Qt5Agg


In [7]:
intensities_ext = np.concatenate((np.full(400, intensities_sg[0]), intensities_sg, np.full(400, intensities_sg[-1])))
scales = np.linspace(1, 200, 200)

coefs, freqs = pywt.cwt(data=intensities_ext, scales=scales, wavelet="mexh")

coefs = coefs[:,400:-400]

In [8]:
wavenumbers_grid, scales_grid = np.meshgrid(wavenumbers, scales)

In [9]:
%matplotlib
fig, [ax1, ax2] = plt.subplots(2,1)
sp = ax1.plot(wavenumbers, intensities_sg, linewidth=1, color="red")
ax1.set_xlim(wavenumbers[0], wavenumbers[-1])
ax1.invert_xaxis()
ax1.grid()

cmap = cm.seismic
normal = colors.Normalize(vmin=-np.max(np.abs(coefs)), vmax=np.max(np.abs(coefs)))

cp = ax2.contourf(wavenumbers_grid, scales_grid, coefs, cmap=cmap, norm=normal, levels=200, extend="both")
cbar = fig.colorbar(cp, orientation="horizontal")
ax2.invert_xaxis()

Using matplotlib backend: Qt5Agg


In [11]:
def shannon_entropy(data):
    data_sum = np.abs(data).sum(axis=1, keepdims=True)
    p_data = np.abs(data) / data_sum
    shannon_H = -np.sum(p_data * np.log(p_data), axis=1)
    return shannon_H

In [12]:
def determine_optim_scale(coefs, scales):
    shannon_H = shannon_entropy(coefs)
    
    plt.plot(scales, shannon_H)
    min_H = sig.argrelextrema(shannon_H, np.less)
    optim_scale = scales[min_H[0][0]]
    return optim_scale

scale = determine_optim_scale(coefs, scales)

In [13]:
intensities_ext = np.concatenate((np.full(400, intensities_sg[0]), intensities_sg, np.full(400, intensities_sg[-1])))
coefs, freqs = pywt.cwt(data=intensities_ext, scales=scale, wavelet="mexh")
coefs = coefs[0,400:-400]

In [14]:
%matplotlib
fig, ax= plt.subplots()

ax.plot(wavenumbers, intensities_sg, linewidth = 0.5, color = "red")
ax.set_xlim(wavenumbers[0], wavenumbers[-1])
ax.plot(wavenumbers, coefs, linewidth = 0.5, color = "blue")
ax.invert_xaxis()
ax.grid()

Using matplotlib backend: Qt5Agg


In [15]:
def iterative_gaussian_fit(data, n_bins=200, diff=0.05):
    data_range = np.max(data) - np.min(data)
    n_change_prev = 0
    
    while True:
        # Calculate histogram
        hist, bins = np.histogram(data, n_bins)
        
        # Find bins with density more than 1/3 of max density
        max_bins_indices = np.where(hist > max(hist)/3)
        max_bins_low = max_bins_indices[0][0]
        max_bins_high = max_bins_indices[0][-1]
        max_data_low = bins[max_bins_low]
        max_data_high = bins[max_bins_high+1]
        
        # Reduce data to selected bins and calculate mean and standard deviation
        data_reduced = data[data > max_data_low]
        data_reduced = data_reduced[data_reduced < max_data_high]
        mu, sigma = norm.fit(data_reduced, floc=0)
        
        #Calculate new number of bins nad compare difference
        n_bins_prev = n_bins
        n_bins = math.ceil(8 * data_range / sigma)
        n_change = (n_bins/n_bins_prev)-1
        print("Bins:", n_bins, "\tChange:", round(n_change*100, 1), "%")
        if abs(n_change) <= diff or n_change*n_change_prev < 0:
            return sigma
        n_change_prev = n_change
        

In [16]:
sigma = iterative_gaussian_fit(coefs)

Bins: 738 	Change: 269.0 %
Bins: 1625 	Change: 120.2 %
Bins: 1755 	Change: 8.0 %
Bins: 1746 	Change: -0.5 %


In [16]:
fig, ax = plt.subplots()
density, bins, hist = ax.hist(coefs, bins=11466, density=True)
x = np.linspace(-sigma*10, sigma*10, 100)
gauss = norm.pdf(x,0, sigma)
gauss = gauss / np.max(gauss) * np.max(density)
ax.plot(x, gauss)

[<matplotlib.lines.Line2D at 0x189e9215610>]

In [17]:
coefs_inner = coefs[np.abs(coefs) < 3*sigma]
n_coefs_inner = len(coefs_inner)
n_coefs_outer = len(coefs) - n_coefs_inner

In [18]:
theta = sigma * (0.6 + 10 * (n_coefs_outer/n_coefs_inner))
theta/sigma

18.108090614886734

In [19]:
baseline_indices = []

for i in range(len(coefs)-9):
    if np.all(np.abs(coefs[i:i+9])<theta):
        baseline_indices.append(i+5)


In [20]:
baseline_wns = wavenumbers[baseline_indices]
baseline_intensities = intensities_sg[baseline_indices]

In [23]:
fig, ax = plt.subplots()

ax.plot(wavenumbers, intensities)
ax.plot(baseline_wns, baseline_intensities, color="red", marker="o", markersize=1, linewidth=0)
ax.plot(wavenumbers, coefs)
ax.hlines([-theta, 0, theta], wavenumbers[0], wavenumbers[-1])
ax.set_xlim(wavenumbers[0], wavenumbers[-1])
ax.invert_xaxis()
ax.grid()

In [22]:
fit_params, resid = poly.polyfit(baseline_wns, baseline_intensities, deg=5, full=True)
resid = resid[0][0]
res_std_err = math.sqrt(resid/len(baseline_wns))
baseline_intensities_fit = poly.polyval(baseline_wns, fit_params)

In [24]:
intensities_fit = poly.polyval(wavenumbers, fit_params)
fig, ax = plt.subplots()
ax.plot(wavenumbers, intensities_fit, linewidth=1, color="red")
ax.plot(wavenumbers, intensities_sg, linewidth=1, color="blue")
ax.plot(baseline_wns, baseline_intensities, color="red", marker="o", markersize=1, linewidth=0)
ax.set_xlim(wavenumbers[0], wavenumbers[-1])
ax.invert_xaxis()
ax.grid()

In [60]:
outliers = []
for i in range(len(baseline_wns)):
    if abs(baseline_intensities[i] - baseline_intensities_fit[i]) > 1.5 * res_std_err:
        outliers.append(i)
baseline_wns = [wn for i, wn in enumerate(baseline_wns) if i not in outliers]
baseline_intensities = [intensity for i, intensity in enumerate(baseline_intensities) if i not in outliers]
fit_params, resid = poly.polyfit(baseline_wns, baseline_intensities, deg=5, full=True)

In [61]:
intensities_fit = poly.polyval(wavenumbers, fit_params)
fig, ax = plt.subplots()
ax.plot(wavenumbers, intensities_fit, linewidth=1, color="red")
ax.plot(wavenumbers, intensities_sg, linewidth=1, color="blue")
ax.plot(baseline_wns, baseline_intensities, color="red", marker="o", markersize=1, linewidth=0)
ax.set_xlim(wavenumbers[0], wavenumbers[-1])
ax.invert_xaxis()
ax.grid()

In [62]:
negatives = 0
k = math.ceil(len(wavenumbers)/100)
for i in range(len(wavenumbers)):
    if intensities_sg[i] < intensities_fit[i]:
        negatives += 1
    else:
        if negatives >= k:
            j = i - negatives//2
            baseline_wns.append(wavenumbers[j])
            baseline_intensities.append(intensities_sg[j])
        negatives = 0

[1;31mSignature:[0m [0mnp[0m[1;33m.[0m[0mall[0m[1;33m([0m[0ma[0m[1;33m,[0m [0maxis[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m [0mout[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m [0mkeepdims[0m[1;33m=[0m[1;33m<[0m[0mno[0m [0mvalue[0m[1;33m>[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Test whether all array elements along a given axis evaluate to True.

Parameters
----------
a : array_like
    Input array or object that can be converted to an array.
axis : None or int or tuple of ints, optional
    Axis or axes along which a logical AND reduction is performed.
    The default (``axis=None``) is to perform a logical AND over all
    the dimensions of the input array. `axis` may be negative, in
    which case it counts from the last to the first axis.

    .. versionadded:: 1.7.0

    If this is a tuple of ints, a reduction is performed on multiple
    axes, instead of a single axis or all the axes as before.
out : ndarray, optional
    Alternat