# Data Analysis of Blood Lactate Pyruvate calibration measurements

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import matplotlib
matplotlib.rcParams.update({'font.size': 12,'font.family':'serif','font.serif':['Computer Modern'],"text.usetex" : True,})

import ipywidgets as widgets
import datetime
import pandas as pd
from tqdm.auto import tqdm
import os
import nmrglue
import hypermri
import hypermri.utils.utils_anatomical as ut_anat
import hypermri.utils.utils_spectroscopy as ut_spec
import hypermri.utils.utils_fitting as ut_fitting
from scipy.optimize import curve_fit
from matplotlib import cm
from astropy.modeling import models, fitting

from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)
import matplotlib.dates as mdates
    
import sys
# define paths:
sys.path.append('../../')
import TEMPLATE
# get paths:
repopath, basepath, savepath = TEMPLATE.import_all_packages()

# Autoreload extension so that you dont have to reload the kernel every time something is changed in the hypermri or magritek folders
%load_ext autoreload
%autoreload 2

%matplotlib widget


def avg_temp(temp_array,window_size=5):
    i = 0
    multi_sec_avg = []
    while i < len(temp_array) - window_size + 1:
        window_average = round(np.sum(temp_array[i:i+window_size]) / window_size, 2)
        multi_sec_avg.append(window_average)
        i += 1
    return multi_sec_avg


### Import fitting functions

In [None]:
from hypermri.utils.utils_spectroscopy import find_npeaks as ut_find_npeaks
from hypermri.utils.utils_fitting import temperature_from_frequency

# 1. Defining Paths to data and loading temperature data

In [None]:
dirpath1 = os.path.join(basepath, '')
dirpath2= os.path.join(basepath, '')

In [None]:
temp_1 = hypermri.utils.utils_general.load_pc_sam_temp_file(dirpath1+'/.txt','',(2*3600)+8,False)
temp_2 = hypermri.utils.utils_general.load_pc_sam_temp_file(dirpath2+'/.txt','',3608,False)

# 2. Load bruker data and select scans

In [None]:
scans1 = hypermri.BrukerDir(dirpath1,verbose=False,keywords=['PVM_NRepetitions','PVM_ScanTime'])
scans2 = hypermri.BrukerDir(dirpath2,verbose=False,keywords=['PVM_NRepetitions','PVM_ScanTime'])

In [None]:
exp_1=scans1[5]

exp_2=scans2[6]
exp_3=scans2[7]
exp_4=scans2[8]
exp_5=scans2[9]

exp_list1 = [exp_1]
exp_list2=[exp_2,exp_3,exp_4,exp_5]



### Quick look at scans to find the reps that have sufficient lactate and pyruvate signal strengths

In [None]:
hyper = exp_2

print(datetime.datetime.strptime(hyper.acqp['ACQ_time'][1:11]+'/'+hyper.acqp['ACQ_time'][12:20],
                                                            '%Y-%m-%d/%H:%M:%S'))

fig,ax=plt.subplots(1)
@widgets.interact(rep=(0,350-1,1))
def update(rep=0):
    ax.cla()
    ax.plot(hyper.ppm,hyper.spec[rep,:])

# 3. Secondary input data regarding samples

In [None]:
# enzyme solution pH after measurement
sample_pH = [6.75,7.27,6.97,7.04,6.95]

sig_1 = range(66,86)
sig_2 = range(46,82)
sig_3=range(47,71)
sig_4=range(50,65)
sig_5=range(56,71)


signal_reps1 = [sig_1]
signal_reps2=[sig_2,sig_3,sig_4,sig_5]

In [None]:
np.mean(sample_pH),np.std(sample_pH),len(sample_pH)

# 4. Define Fitting Functions

In [None]:
def fit_experiment_list_to_temperature(experiment_list,signal_list,temperature_dataframe,plot_exp=0):
    fit_spectrums_all = []
    fit_amps_all = []
    fit_freqs_all = []
    fit_t2s_all = []
    raw_spectral_data = []
    fit_freqs_all_ppm=[]
    for index,exp_num in enumerate(tqdm(experiment_list,desc='Fitting experiments')):
        experiment = experiment_list[index]
        # select the repetitions which have sufficient signal
        repetitions_to_fit = signal_list[index]

        metabs = ['pyruvate','lactate','hydrate']

        fit_params = {}

        fit_params["zoomfactor"] = 1.5
        fit_params["max_t2_s"] = 1
        fit_params["min_t2_s"] = 0.0
        fit_params["range_t2s_s"] = 0.2

        # get the indices of the peaks:
        fit_params["metabs"] = metabs
        fit_params["fit_range_repetitions"] = repetitions_to_fit
        fit_params["range_freqs_Hz"] = 100
        fit_params["cut_off"] = 70
        fit_params["niter"] = 1 # number of iterations:
        fit_params["npoints"] = 21 # number of tested points per iteration:
        fit_params["rep_fitting"] = 11 # number of tested points per iteration:
        fit_params["provided_dims"] = ["reps", "fid"]
        fit_params["use_all_cores"] = True

        fit_params = ut_fitting.def_fit_params(fit_params=fit_params, data_obj=experiment)


        # get the indices of the peaks:
        # get the indices of the peaks:
        peak_indices = ut_spec.find_npeaks(input_data=np.abs(np.fft.fftshift(np.fft.fft(np.fft.ifft(np.fft.ifftshift(experiment.complex_spec[repetitions_to_fit[0],:]))[fit_params["cut_off"]::]))),
                                      freq_range=fit_params['freq_range_ppm'],
                                      npeaks=len(metabs),
                                      find_peaks_params={'distance': 200},
                                           plot=False)
        # diff literature and measured:

        fit_params["metabs_freqs_ppm"] = fit_params["freq_range_ppm"][peak_indices]
            #overwrite the peak values since the hydrate peak is not found for some reason
        fit_params["metabs_freqs_ppm"] = [161.6,174.3,170.8]
        fit_params["metabs_freqs_Hz"] = [None, None, None]

        fit_params["niter"] = 1 # number of iterations:
        fit_params["npoints"] = 21 # number of tested points per iteration:
        fit_params["rep_fitting"] = 11 # number of tested points per iteration:

        fit_params["provided_dims"] = ["reps", "fid"]
        fit_params["use_all_cores"] = True

        fit_params = ut_fitting.def_fit_params(fit_params=fit_params, data_obj=experiment)

        fit_spectrums, fit_amps, fit_freqs, fit_t2s, _  = ut_fitting.fit_data_pseudo_inv(input_data=experiment.complex_spec,
                                                                                     data_obj=experiment,
                                                                          fit_params=fit_params,
                                                                      use_multiprocessing=True)

        fit_freqs_ppm = ut_spec.freq_Hz_to_ppm(freq_Hz=np.squeeze(fit_freqs), hz_axis=fit_params["freq_range_Hz"], ppm_axis=fit_params["freq_range_ppm"], ppm_axis_flipped=False)
        fit_spectrums_all.append(fit_spectrums)
        fit_amps_all.append(fit_amps)
        fit_freqs_all.append(fit_freqs)
        fit_t2s_all.append(fit_t2s)
        fit_freqs_all_ppm.append(fit_freqs_ppm)
    
    

    # correlate that to temperature
    # make the final dataframe into which all data will be saved
    final_output_dataframe = pd.DataFrame(columns=['Time','EpochTime','Temperature','Pyruvate','Lactate'])

    final_output_dataframe['Time']=temperature_dataframe['Time']
    final_output_dataframe['Temperature']=temperature_dataframe['Temperature']
    final_output_dataframe['EpochTime']=temperature_dataframe['EpochTime']


    # use np array to intialize where we will save the peak positions to
    # 5 columns for (EpochTime(s), Temperature [°C], urea_peak_position [Hz],pyruvate_peak_positon[Hz],lactate_peak_positon[Hz], pyruvate hydrate peak pos [Hz], urea reaction peak pos [Hz])

    output = np.zeros((7,len(temperature_dataframe['EpochTime'])))*np.nan
    output[0]=temperature_dataframe['EpochTime']
    output[1]=temperature_dataframe['Temperature']

    for experiment_number in range(len(experiment_list)):
        current_scan = experiment_list[experiment_number]
        nr = signal_list[experiment_number]
        TR = current_scan.method['PVM_RepetitionTime']/1000
        seq_start_time = current_scan.acqp['ACQ_time'][1:20]
        # convert this to seconds since January 1 1970 for easier comparison since hours, minutes and seconds are not ideal
        seq_start_time_seconds = datetime.datetime.strptime(current_scan.acqp['ACQ_time'][1:11]+'/'+current_scan.acqp['ACQ_time'][12:20],
                                                                '%Y-%m-%d/%H:%M:%S').timestamp()
        spec_points = current_scan.method['PVM_SpecMatrix']
        #print('Sequence start unix time',seq_start_time_seconds)
        for rep_idx,repetition in enumerate(nr):
            current_rep_time = seq_start_time_seconds+repetition*TR
            current_time_index = np.argmin(np.abs(final_output_dataframe['EpochTime']-current_rep_time))
            #print('Temp frame time',final_output_dataframe1['EpochTime'].loc[current_time_index])
            current_frequencies = fit_freqs_all[experiment_number][0,0,0,0,repetition,0,:]
            current_frequencies_ppm = np.array(fit_freqs_all_ppm[experiment_number])[repetition,:]
            output[2,current_time_index] = current_frequencies[0]# pyruvate
            output[3,current_time_index] = current_frequencies[1]# lactate
            output[4,current_time_index]=current_frequencies_ppm[0]#pyruvate
            output[5,current_time_index]=current_frequencies_ppm[1]#pyruvate
            
        final_output_dataframe['Pyruvate']=output[2]
        final_output_dataframe['Lactate']=output[3]
        final_output_dataframe['Pyruvate_ppm']=output[4]
        final_output_dataframe['Lactate_ppm']=output[5]
        
        final_output_dataframe['PyrLacDiff']=np.abs(output[2]-output[3])
        final_output_dataframe['PyrLacDiff_ppm']=np.abs(output[4]-output[5])

    return final_output_dataframe,fit_freqs_all,fit_spectrums_all,fit_params,fit_freqs_all_ppm

def plot_fit_results(fit_spectrums_all,fit_freqs_all,signal_list,experiment_list,fit_params,plot_exp=0):
    ppm=ut_spec.get_freq_axis(experiment_list[plot_exp],cut_off=fit_params['cut_off'])
    fig,ax=plt.subplots(1)      
    
    @widgets.interact(rep = (signal_list[plot_exp][0],signal_list[plot_exp][-1],1))
    def update(rep=signal_list[plot_exp][0]):
        ax.cla()
        ax.plot(ppm,np.real(np.sum(np.squeeze(fit_spectrums_all[plot_exp])[:,rep,:],axis=1)))
        ax.plot(ppm,np.real(np.fft.fftshift(np.fft.fft(experiment_list[plot_exp].fids[rep,:][fit_params["cut_off"]::]))),c='k',alpha=0.3)
        ax.set_title((np.squeeze(fit_freqs_all[plot_exp])[rep,0])-np.squeeze(fit_freqs_all[plot_exp])[rep,1])


### Fit all spectra from all experiments

In [None]:
final_output_dataframe1,fit_freqs_1_all,fit_spectrums_1_all,fit_params_1,fit_freqs_1_all_ppm = fit_experiment_list_to_temperature(exp_list1,signal_reps1,temp_1)

### Check if fits worked

In [None]:
plot_fit_results(fit_spectrums_1_all,fit_freqs_1_all,signal_reps1,exp_list1,fit_params_1,0)

## Second round of experiments

In [None]:
final_output_dataframe2,fit_freqs_2_all,fit_spectrums_2_all,fit_params_2,fit_freqs_2_all_ppm = fit_experiment_list_to_temperature(exp_list2,signal_reps2,temp_2)

In [None]:
plot_fit_results(fit_spectrums_2_all,fit_freqs_2_all,signal_reps2,exp_list2,fit_params_2,2)

### Check results

In [None]:
plt.close('all')
final_output_dataframe2.plot.scatter('Temperature','PyrLacDiff')

# 5. Combine the dataframes from the two experiments

In [None]:
idx_both1 = np.array(final_output_dataframe1['PyrLacDiff'].notna())
temps = np.array(final_output_dataframe1['Temperature'].iloc[idx_both1])

frequencies = np.array(final_output_dataframe1['PyrLacDiff'].iloc[idx_both1])
pyr_frq1 = np.array(final_output_dataframe1['Pyruvate'].iloc[idx_both1])
lac_frq1 = np.array(final_output_dataframe1['Lactate'].iloc[idx_both1])
frequencies_ppm = np.array(final_output_dataframe1['PyrLacDiff_ppm'].iloc[idx_both1])


idx_both2 = np.array(final_output_dataframe2['PyrLacDiff'].notna())
temps2 = np.array(final_output_dataframe2['Temperature'].iloc[idx_both2])

frequencies2 = np.array(final_output_dataframe2['PyrLacDiff'].dropna())
pyr_frq2 = np.array(final_output_dataframe2['Pyruvate'].iloc[idx_both2])
lac_frq2 = np.array(final_output_dataframe2['Lactate'].iloc[idx_both2])
frequencies_ppm2 = np.array(final_output_dataframe2['PyrLacDiff_ppm'].iloc[idx_both2])



combined_df = pd.DataFrame({'Temperature':np.concatenate((temps,temps2)),'Frequency':np.concatenate((frequencies,frequencies2)),
                           'Pyruvate':np.concatenate((pyr_frq1,pyr_frq2)),'Lactate':np.concatenate((lac_frq1,lac_frq2)),
                           'Frequency_ppm':np.concatenate((frequencies_ppm,frequencies_ppm2))})
combined_df

# 6. Plot results

In [None]:
fig,ax=plt.subplots(1,3,figsize=(10,3),tight_layout=True)
window=2
temp_change = np.gradient(avg_temp(combined_df['Temperature'],window))

im=ax[0].scatter(combined_df['Temperature'][:-window+1],combined_df['Pyruvate'][:-window+1],
              c=temp_change,cmap='coolwarm',s=8
             ,vmin=-0.01,vmax=0.01)

im2=ax[1].scatter(combined_df['Temperature'][:-window+1],combined_df['Lactate'][:-window+1],
              c=temp_change,cmap='coolwarm',s=8
             ,vmin=-0.01,vmax=0.01)

im3=ax[2].scatter(combined_df['Temperature'][:-window+1],combined_df['Frequency'][:-window+1],
              c=temp_change,cmap='coolwarm',s=8
             ,vmin=-0.01,vmax=0.01)



[ax[n].set_ylabel('Hz') for n in range(3)]
[ax[n].set_xlabel('T[°C]') for n in range(3)]


fig.colorbar(im2,ax=ax[2],label=' dT/dt [°C/s]')

ax[0].set_title('Pyr')
ax[1].set_title('Lact')
ax[2].set_title('Difference')

fig.suptitle('Absolute peak positions')



# Fit

In [None]:
def lin_fun(x,M,k):
    return x*k+M
plt.close('all')
timeframe=[0,-1]

import seaborn as sns

y_fit_data = combined_df['Frequency_ppm']

x_fit_data = combined_df['Temperature']



coeff,pcov = curve_fit(lin_fun,x_fit_data,y_fit_data)

residuals = y_fit_data - lin_fun(x_fit_data, *coeff)
ss_res = np.sum(residuals**2)

ss_tot = np.sum((y_fit_data-np.mean(y_fit_data))**2)

r_squared = 1 - (ss_res / ss_tot)

fig,ax=plt.subplots(1,figsize=(4,3),tight_layout=True)

im = ax.scatter(x_fit_data,y_fit_data,label='Data')
sns.regplot(x=x_fit_data,y=y_fit_data,ci=99,ax=ax,scatter=False)


ax.plot(x_fit_data,lin_fun(x_fit_data,coeff[0],coeff[1]),color='r',label='Linear fit')

ax.set_xlabel('T [°C]')
ax.set_ylabel(r'$\vert f_{pyr}-f_{lac}\vert$ [Hz]')
ax.legend()
ax.set_title('f(T)='+str(np.round(coeff[0],3))+str(np.round(coeff[1],3))+'*T'+r', $R^2$='+str(np.round(r_squared,3)))
print(np.sqrt(np.diag(pcov)))

fit_results_dir = {'fit fun structure':'f(T)=B*T+A',
                  'A (ppm)':coeff[0],
                  'B (ppm/K)':coeff[1],
                  'dA (ppm)':np.sqrt(np.diag(pcov))[0],
                  'dB (ppm/K)':np.sqrt(np.diag(pcov))[1],
                   'R2 (ppm)':r_squared
                  }



In [None]:
def lin_fun(x,M,k):
    return x*k+M
plt.close('all')
timeframe=[0,-1]

import seaborn as sns

y_fit_data = combined_df['Frequency']

x_fit_data = combined_df['Temperature']



coeff,pcov = curve_fit(lin_fun,x_fit_data,y_fit_data)

residuals = y_fit_data - lin_fun(x_fit_data, *coeff)
ss_res = np.sum(residuals**2)

ss_tot = np.sum((y_fit_data-np.mean(y_fit_data))**2)

r_squared = 1 - (ss_res / ss_tot)

fig,ax=plt.subplots(1,figsize=(4,3),tight_layout=True)

im = ax.scatter(x_fit_data,y_fit_data,label='Data')
sns.regplot(x=x_fit_data,y=y_fit_data,ci=99,ax=ax,scatter=False)


ax.plot(x_fit_data,lin_fun(x_fit_data,coeff[0],coeff[1]),color='r',label='Linear fit')

ax.set_xlabel('T [°C]')
ax.set_ylabel(r'$\vert f_{pyr}-f_{lac}\vert$ [Hz]')
ax.legend()
ax.set_title('f(T)='+str(np.round(coeff[0],3))+str(np.round(coeff[1],3))+'*T'+r', $R^2$='+str(np.round(r_squared,3)))
print(np.sqrt(np.diag(pcov)))

fit_results_dir.update({'fit fun structure':'f(T)=B*T+A',
                  'A (Hz)':coeff[0],
                  'B (Hz/K)':coeff[1],
                  'dA (Hz)':np.sqrt(np.diag(pcov))[0],
                  'dB (Hz/K)':np.sqrt(np.diag(pcov))[1],
                   'R2 (Hz)':r_squared
                  })
#np.savez(savepath+'Blood_calibration_fit_function',fit_results_dir)


In [None]:
fig,ax=plt.subplots(1,figsize=(7,5),tight_layout=True)
window=2
temp_change = np.gradient(avg_temp(combined_df['Temperature'],window))

im=ax.scatter(combined_df['Temperature'][:-window+1],combined_df['Frequency'][:-window+1],
              c=temp_change,cmap='coolwarm',s=12
             ,vmin=-0.01,vmax=0.01)



ax.set_ylabel('Hz')
ax.set_xlabel('T[°C]')
    
fig.colorbar(im,ax=ax,label=' dT/dt [°C/s]')

ax.set_title('Pyr-Lac')

fig.suptitle('Relative peak positions')


In [None]:
# export that data

In [None]:
now = datetime.datetime.now()
now_str = now.strftime("%Y_%m_%d-%H_%M_%S")
combined_df.to_excel(savepath+'Blood_calibration_data.xlsx')