### Imports

In [None]:
import sys
import numpy as np
import pandas as pd
import os
from masserstein import NMRSpectrum, estimate_proportions
import matplotlib.pyplot as plt
import pulp
import pickle

### Data

In [None]:
experiment_name = 'Saccharose hydrolysis'

In [None]:
mixture_path = '../../data/saccharose_hydrolysis/sucrose_better_baseline_constant_region_sizes.csv'

reagents_names = ['saccharose', 'alpha-glucose','beta-glucose', 'fructose']

mixture_separator = '\t'

data_path = '../../data/saccharose_hydrolysis/'

#### Mixture in time

In [None]:
mixture_time_data = pd.read_csv(mixture_path, sep = mixture_separator).iloc[:,:-1]

In [None]:
mixture_time_data = mixture_time_data.dropna()

In [None]:
names = ['ppm'] + ['t' + str(nb) for nb in range(1, mixture_time_data.shape[1])]

In [None]:
mixture_time_data.columns = names

In [None]:
def load_spectrum(mixture_time_data, moment_of_time):
    ppm = mixture_time_data['ppm']
    intensity = mixture_time_data['t'+str(moment_of_time)]
    sp = NMRSpectrum(confs = list(zip(ppm, intensity)))
    return sp

In [None]:
%matplotlib notebook

In [None]:
plt.gca().invert_xaxis()
NMRSpectrum.plot_all([load_spectrum(mixture_time_data, 1),
                     load_spectrum(mixture_time_data, 1000)], profile=True)

#### Reagents

In [None]:
# saccharose: (5.39173, 5.44305)
# alpha-glucose: (5.2178, 5.26134)
# beta-glucose: (4.62026, 4.67207)
# fructose: (3.97917, 4.01542)

In [None]:
mix1001 = pd.DataFrame(load_spectrum(mixture_time_data, 1001).confs)

reagent0 = mix1001[mix1001.iloc[:,0].apply(lambda x: (x>=5.39173 and x<=5.44305))]

reagent1 = mix1001[mix1001.iloc[:,0].apply(lambda x: (x>=5.2178 and x<=5.26134))]

reagent2 = mix1001[mix1001.iloc[:,0].apply(lambda x: (x>=4.62026 and x<=4.67207))]

reagent3 = mix1001[mix1001.iloc[:,0].apply(lambda x: (x>=3.97917 and x<=4.01542))]

reagents = [reagent0, reagent1, reagent2, reagent3]
reagents_sp = []

for reagent in reagents:
    ppm = reagent.iloc[:,0]
    ints = reagent.iloc[:,1]
    reagents_sp.append(NMRSpectrum(confs = list(zip(ppm, ints))))

for sp in reagents_sp:
    sp.trim_negative_intensities()
    sp.normalize()

In [None]:
mix_to_plot = load_spectrum(mixture_time_data, 1)
mix_to_plot.normalize()

plt.gca().invert_xaxis()
NMRSpectrum.plot_all([mix_to_plot] + reagents_sp, profile=True)

### Mixture

In [None]:
mixture_time_data = mixture_time_data[mixture_time_data['ppm'].apply(lambda x: 
                                                                        (x>=5.39173 and x<=5.44305) or
                                                                        (x>=5.2178 and x<=5.26134) or 
                                                                        (x>=4.62026 and x<=4.67207) or
                                                                        (x>=3.97917 and x<=4.01542)
                                                                    )]

In [None]:
#baseline correction
# for colname in ['t' + str(nb) for nb in range(1, mixture_time_data.shape[1])]:
#     mixture_time_data[colname] = mixture_time_data[colname].apply(lambda x: x-0.0395)

In [None]:
plt.gca().invert_xaxis()
NMRSpectrum.plot_all([load_spectrum(mixture_time_data, 1),
                     load_spectrum(mixture_time_data, 1000)], profile=True)

### Figures

In [None]:
mix_to_plot = load_spectrum(mixture_time_data, 1)
mix_to_plot.trim_negative_intensities()
mix_to_plot.normalize()

plt.gca().invert_xaxis()
NMRSpectrum.plot_all([mix_to_plot] + reagents_sp, profile=False)

### Saving preprocessed spectra

In [None]:
np.savetxt(data_path + 'preprocessed_saccharose.csv', reagent0, delimiter = '\t')
np.savetxt(data_path + 'preprocessed_alpha_glucose.csv', reagent1, delimiter = '\t')
np.savetxt(data_path + 'preprocessed_beta_glucose.csv', reagent2, delimiter = '\t')
np.savetxt(data_path + 'preprocessed_fructose.csv', reagent3, delimiter = '\t')

In [None]:
np.savetxt(data_path + 'preprocessed_mixture.csv', mixture_time_data, delimiter = '\t')