### Imports

In [1]:
import sys
mypath = "/home/basia/Documents/spectroscopy/magnetstein"
sys.path.insert(0, mypath)

In [2]:
import numpy as np
import pandas as pd
import os
from masserstein import NMRSpectrum, estimate_proportions
import matplotlib.pyplot as plt
import pulp
import pickle

### Data

In [3]:
experiment_name = 'PMG 287 monitoring'
products = 'known_products'

In [4]:
mixture_paths = {'Saccharose hydrolysis': '../data/saccharose_1000/all.csv',
                'Silane': '../data/silan/pmg166_3rdseries_full.csv',
                'PMG 247 monitoring': '../data/PMG_247/preprocessed_PMG247_monitoring.csv',
                'PMG 271 monitoring': '../data/PMG_271/PMG271_monitoring_druga_seria.csv',
                'PMG 284 monitoring': '../data/PMG_284/seria1.csv',
                'PMG 287 monitoring': '../data/PMG_287/preprocessed_mixture.csv'
                }

reagents_paths = {'Saccharose hydrolysis': ['../data/saccharose_1000/sucrose.csv', 
                                            '../data/saccharose_1000/glucose.csv',
                                            '../data/saccharose_1000/fructose.csv'],
                'Silane': [],
                'PMG 247 monitoring': ['../data/PMG_247/preprocessed_substrate0.csv',
                                      '../data/PMG_247/preprocessed_substrate1.csv',
                                      'product_path'],
                'PMG 271 monitoring': ['../data/PMG_271/preprocessed_substrate0.csv',
                                      '../data/PMG_271/preprocessed_substrate1.csv',
                                      'product_path'],
                'PMG 284 monitoring': ['../data/PMG_284/preprocessed_substrate0.csv',
                                      '../data/PMG_284/preprocessed_substrate1.csv',
                                      '../data/PMG_284/preprocessed_product.csv'],
                'PMG 287 monitoring': ['../data/PMG_287/preprocessed_substrate0.csv',
                                      '../data/PMG_287/preprocessed_substrate1.csv',
                                      '../data/PMG_287/preprocessed_product.csv']}

mixture_separators = {'Saccharose hydrolysis': '\t',
                'Silane': '\t',
                'PMG 247 monitoring': ',',
                'PMG 271 monitoring': '\t',
                'PMG 284 monitoring': '\t',
                'PMG 287 monitoring': '\t'
                }

reagents_separators = {'Saccharose hydrolysis': '\t',
                'Silane': '\t',
                'PMG 247 monitoring': '\t',
                'PMG 271 monitoring': '\t',
                'PMG 284 monitoring': '\t',
                'PMG 287 monitoring': '\t'
                }

results_paths = {'Saccharose hydrolysis': '../results/saccharose_1000/comparing_with_first/'+products+'/',
                'Silane': '../results/silan/comparing_with_first/'+products+'/',
                'PMG 247 monitoring': '../results/PMG_247/comparing_with_first/'+products+'/',
                'PMG 271 monitoring': '../results/PMG_271/comparing_with_first/'+products+'/',
                'PMG 284 monitoring': '../results/PMG_284/comparing_with_first/'+products+'/',
                'PMG 287 monitoring': '../results/PMG_287/comparing_with_first/'+products+'/'}

#### Mixture in time

In [5]:
mixture_time_data = pd.read_csv(mixture_paths[experiment_name], sep = mixture_separators[experiment_name])
if experiment_name == 'Saccharose hydrolysis':
    ppm = mixture_time_data.iloc[:,:-1].iloc[:,0:1]
    every_10th = mixture_time_data = mixture_time_data.iloc[:,:-1].iloc[:,1:].iloc[:,::10]
    mixture_time_data = pd.concat((ppm, every_10th), axis=1)
elif experiment_name == 'PMG 271 monitoring':
    ppm = mixture_time_data.iloc[:,:-1].iloc[:,0:1]
    every_20th = mixture_time_data = mixture_time_data.iloc[:,:-1].iloc[:,1:].iloc[:,::20]
    mixture_time_data = pd.concat((ppm, every_20th), axis=1)
elif experiment_name == 'PMG 284 monitoring':
    ppm = mixture_time_data.iloc[:,:-1].iloc[:,0:1]
    every_10th = mixture_time_data = mixture_time_data.iloc[:,:-1].iloc[:,1:].iloc[:,::10]
    mixture_time_data = pd.concat((ppm, every_10th), axis=1)
elif experiment_name == 'PMG 287 monitoring':
    ppm = mixture_time_data.iloc[:,:-1].iloc[:,0:1]
    every_10th = mixture_time_data = mixture_time_data.iloc[:,:-1].iloc[:,1:].iloc[:,::10]
    mixture_time_data = pd.concat((ppm, every_10th), axis=1)

In [6]:
names = ['ppm'] + ['t' + str(nb) for nb in range(1, mixture_time_data.shape[1])]

In [7]:
mixture_time_data.columns = names

In [8]:
def load_spectrum(mixture_time_data, moment_of_time):
    ppm = mixture_time_data['ppm']
    intensity = mixture_time_data['t'+str(moment_of_time)]
    sp = NMRSpectrum(confs = list(zip(ppm, intensity)))
    sp.trim_negative_intensities()
    sp.normalize()
    return sp

#### Reagents

In [9]:
reagents_spectra = []
for reagent in reagents_paths[experiment_name]:
    reag = pd.read_csv(reagent, sep=reagents_separators[experiment_name], header=None).iloc[:,:2]
    reagents_spectra.append(reag)

In [10]:
reagents_spectra2 = []
for reag in reagents_spectra:
    ppm = reag.iloc[:,0]
    ints = reag.iloc[:,1]
    sp = NMRSpectrum(confs = list(zip(ppm, ints)))
    reagents_spectra2.append(sp)
reagents_spectra = reagents_spectra2
del(reagents_spectra2)

In [11]:
for sp in reagents_spectra:
    sp.trim_negative_intensities()
    sp.normalize()

In [12]:
if products == 'unknown_products':
    if experiment_name == 'Saccharose hydrolysis':
        reagents_spectra = reagents_spectra[:-2]
    else:
        reagents_spectra = reagents_spectra[:-1]

In [13]:
%matplotlib notebook

In [14]:
reagents_spectra

[<masserstein.nmr_spectrum.NMRSpectrum at 0x7598273aeb60>,
 <masserstein.nmr_spectrum.NMRSpectrum at 0x75982b580ca0>,
 <masserstein.nmr_spectrum.NMRSpectrum at 0x7598273adf60>]

In [15]:
plt.gca().invert_xaxis()
NMRSpectrum.plot_all(reagents_spectra + [load_spectrum(mixture_time_data, 1)], profile=True)

<IPython.core.display.Javascript object>

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


In [23]:
help(NMRSpectrum.plot_all)

Help on function plot_all in module masserstein.spectrum:

plot_all(spectra, show=True, profile=False, cmap=None, **plot_kwargs)
    Shows the supplied list of spectra on a single plot.



In [26]:
reagents_spectra[0]

<masserstein.nmr_spectrum.NMRSpectrum at 0x72e1cfdb6890>

In [None]:
'/home/basia/Documents/reactions_project/notebooks'

In [22]:
plt.gca().invert_xaxis()
sp = reagents_spectra[0]
ppm = np.array(sp.confs)[:,0]
ints = np.array(sp.confs)[:,1]

sp2 = reagents_spectra[1]
ppm2 = np.array(sp2.confs)[:,0]
ints2 = np.array(sp2.confs)[:,1]

sp3 = reagents_spectra[2]
ppm3 = np.array(sp3.confs)[:,0]
ints3 = np.array(sp3.confs)[:,1]

# 284 product
# area1 = np.logical_and(np.array(sp3.confs)[:,0]>0.622, np.array(sp3.confs)[:,0]<0.672)
# area2 = np.logical_and(np.array(sp3.confs)[:,0]>0.84, np.array(sp3.confs)[:,0]<0.92)
# area3 = np.logical_and(np.array(sp3.confs)[:,0]>0.95, np.array(sp3.confs)[:,0]<1.01)
# area4 = np.logical_and(np.array(sp3.confs)[:,0]>1.042, np.array(sp3.confs)[:,0]<1.09)
# area5 = np.logical_and(np.array(sp3.confs)[:,0]>1.22, np.array(sp3.confs)[:,0]<1.38)
# area6 = np.logical_and(np.array(sp3.confs)[:,0]>1.4, np.array(sp3.confs)[:,0]<1.49)
# area7 = np.logical_and(np.array(sp3.confs)[:,0]>1.5, np.array(sp3.confs)[:,0]<1.675)

# 287 product
area1 = np.logical_and(np.array(sp3.confs)[:,0]>0.37, np.array(sp3.confs)[:,0]<0.458)
area2 = np.logical_and(np.array(sp3.confs)[:,0]>0.801, np.array(sp3.confs)[:,0]<0.84)
area3 = np.logical_and(np.array(sp3.confs)[:,0]>0.85, np.array(sp3.confs)[:,0]<0.892)
area4 = np.logical_and(np.array(sp3.confs)[:,0]>1.187, np.array(sp3.confs)[:,0]<1.3)

plt.figure(figsize=(15, 7))
#plt.xlim(0.55, 1.9)
# plt.plot(ppm, ints, label='1-hexene', color='blue') #2-pentene, saccharose
plt.plot(ppm2, ints2, label='triethylsilane', color='darkorange') #glucose
# plt.plot(ppm3, ints3, label='product', color='blue') #fructose

#284
# plt.plot(ppm3[area1], ints3[area1], label='product', color='green')
# plt.plot(ppm3[area2], ints3[area2], color='green')
# plt.plot(ppm3[area3], ints3[area3], color='green')
# plt.plot(ppm3[area4], ints3[area4], color='green')
# plt.plot(ppm3[area5], ints3[area5], color='green')
# plt.plot(ppm3[area6], ints3[area6], color='green')
# plt.plot(ppm3[area7], ints3[area7], color='green')

#287
# plt.plot(ppm3[area1], ints3[area1], label='product', color='green')
# plt.plot(ppm3[area2], ints3[area2], color='green')
# plt.plot(ppm3[area3], ints3[area3], color='green')
# plt.plot(ppm3[area4], ints3[area4], color='green')


plt.legend()
plt.savefig(os.getcwd()+'/components/'+experiment_name+'_triethylsilane.png', dpi=300)

#plt.figure(figsize=(10, 7))

#plt.figsize(10, 10)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Estimation

In [15]:
kappa = 0.5
kappa_th = 1

In [16]:
proportions_in_times = []
noise_proportions_in_times = []
noise = []
noise_in_components = []

for i in range(1, mixture_time_data.shape[1]):
    print('This is timepoint '+str(i)+'.\n')
    mix = load_spectrum(mixture_time_data, i)
    mix.trim_negative_intensities()
    mix.normalize()
    estimation = estimate_proportions(mix, reagents_spectra, what_to_compare='area', 
                                      solver=pulp.GUROBI(msg=False),
                                     MTD=kappa, MTD_th=kappa_th)
    proportions_in_times.append(estimation['proportions'])
    noise_proportions_in_times.append(estimation['proportion_of_noise_in_components'])
    noise.append(estimation['noise'])
    noise_in_components.append(estimation['noise_in_components'])
    if i>1:
        assert estimation['common_horizontal_axis'] == common_horizontal_axis
    common_horizontal_axis = estimation['common_horizontal_axis']
    
    print('Proportions:\n')
    print(estimation['proportions'])
    print('\n')
    print('Proportion_of_noise_in_components:\n')
    print(estimation['proportion_of_noise_in_components'])
    print('\n')

This is timepoint 1.

Set parameter Username
Academic license - for non-commercial use only - expires 2024-11-13




Proportions:

[0.9459094102300002]


Proportion_of_noise_in_components:

0.0


This is timepoint 2.

Proportions:

[0.9428276263999905]


Proportion_of_noise_in_components:

0.0


This is timepoint 3.

Proportions:

[0.9393606171050028]


Proportion_of_noise_in_components:

0.0


This is timepoint 4.

Proportions:

[0.9427338700810018]


Proportion_of_noise_in_components:

0.0


This is timepoint 5.

Proportions:

[0.9405770078769984]


Proportion_of_noise_in_components:

0.0


This is timepoint 6.

Proportions:

[0.9555674652069932]


Proportion_of_noise_in_components:

0.0


This is timepoint 7.

Proportions:

[0.9489833551320015]


Proportion_of_noise_in_components:

0.0


This is timepoint 8.

Proportions:

[0.9434607336310048]


Proportion_of_noise_in_components:

0.0


This is timepoint 9.

Proportions:

[0.9470721590419955]


Proportion_of_noise_in_components:

0.0


This is timepoint 10.

Proportions:

[0.9477944163719995]


Proportion_of_noise_in_components:

0.0


This is tim

Proportions:

[0.9086128477840018]


Proportion_of_noise_in_components:

0.0004737924510000009


This is timepoint 78.

Proportions:

[0.9178245290399967]


Proportion_of_noise_in_components:

0.00040489170299999854


This is timepoint 79.

Proportions:

[0.9059887271849969]


Proportion_of_noise_in_components:

0.0012372901769999957


This is timepoint 80.

Proportions:

[0.9055701899030052]


Proportion_of_noise_in_components:

0.0030839714440000178


This is timepoint 81.

Proportions:

[0.9136366855119981]


Proportion_of_noise_in_components:

0.0004340438679999991


This is timepoint 82.

Proportions:

[0.9053577464190018]


Proportion_of_noise_in_components:

0.004251014188000009


This is timepoint 83.

Proportions:

[0.9127862168710017]


Proportion_of_noise_in_components:

0.015205052656000027


This is timepoint 84.

Proportions:

[0.9135710820879998]


Proportion_of_noise_in_components:

0.013074217275999996


This is timepoint 85.

Proportions:

[0.9036518651650014]


Propo

In [17]:
len(proportions_in_times)

103

In [18]:
plt.plot(range(len(proportions_in_times)), [prop[0] for prop in proportions_in_times], 'p')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7b8dca2deb90>]

In [17]:
with open(results_paths[experiment_name]+'proportions_in_times_'+str(kappa)+'_'+str(kappa_th)+'.pkl', 'wb') as f:
    pickle.dump(proportions_in_times, f)

In [18]:
with open(results_paths[experiment_name]+'noise_proportions_in_times_'+str(kappa)+'_'+str(kappa_th)+'.pkl', 'wb') as f:
    pickle.dump(noise_proportions_in_times, f)

In [19]:
with open(results_paths[experiment_name]+'common_horizontal_axis_'+str(kappa)+'_'+str(kappa_th)+'.pkl', 'wb') as f:
    pickle.dump(common_horizontal_axis, f)

In [20]:
with open(results_paths[experiment_name]+'noise_'+str(kappa)+'_'+str(kappa_th)+'.pkl', 'wb') as f:
    pickle.dump(noise, f)

In [21]:
with open(results_paths[experiment_name]+'noise_in_components_'+str(kappa)+'_'+str(kappa_th)+'.pkl', 'wb') as f:
    pickle.dump(noise_in_components, f)

### Linear regression

In [34]:
with open(results_paths[experiment_name]+'proportions_in_times_'+str(kappa)+'_'+str(kappa_th)+'.pkl', 'rb') as f:
    y = pickle.load(f)
    y = np.array(y)

In [35]:
t = np.array(range(1, y.shape[0]+1))
t_and_ones = np.vstack([t, np.ones(len(t))]).T

In [36]:
reagent_number = 1

In [37]:
slope, intercept = np.linalg.lstsq(t_and_ones, y)[0][:,reagent_number]

  slope, intercept = np.linalg.lstsq(t_and_ones, y)[0][:,reagent_number]


In [38]:
plt.plot(range(len(y)), [prop[reagent_number] for prop in y], 'p')
plt.plot(slope*t+intercept)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7dbcb3bf8c70>]