# Manual Integration Workbook

* this workbook is run once for each sample.
* change `path_samples` (cell 2) to wherever you unzipped the folder on your local machine
* change `sample_name`, `m` and `c` parameters (gradient and intercept, respectively) in cell 5, then run cell 6 to check the visualization. Run cell 5 and 6 as many times as you like until you get a fit that you like, then run cell 7 (last cell) to calculate conc based on AUC.

In [None]:
# CELL 1

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.patches as patches

import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 50)
import os
import json
import time
import re
import math
import pickle

import sys
sys.path.append("/Users/dteng/Documents/bin/nmr_utils/")
from nmr_targeted_utils import *
from nmr_fitting_funcs import *


In [None]:
# CELL 2

# ===== params =====
template_path = "lproline_ph3.csv"
# change path_samples as required. Use the full path, which is easiest obtained from the topbar in Win Explorer
path_samples = "/Users/dteng/Documents/zdata/nmr/J202208B_pro_survey/manual_integ_20240112_pkg/manual_integ_20240112_csvs"
bs_grad_path = "bootstrap_results_12sep2023.csv"
blue_m1_pkl_path = "blue_m1_dict_20240112.pkl"

multiplets_ls = [[1.9,2.15], [2.295, 2.403], [3.25, 3.5],[4.1, 4.2]]
ref_pk_window = [-0.02, 0.02]
ref_pk_tolerance_window = [0,0]
search_region_padding_size = 0.02

# ========== load data ==========
# load STD template(s)
template_df = pd.read_csv(template_path)
template_df = adjust_to_ref_peak(template_df, ref_pk_window, ref_pk_tolerance_window)

# load blue_m1_dict
with open(blue_m1_pkl_path, 'rb') as f:
    blue_m1_dict = pickle.load(f)

# load sample data
df_dict = {}
for fn in os.listdir(path_samples):
    if ".csv" in fn:
        k = fn.replace(".csv", "")
        df_dict[k] = pd.read_csv(os.path.join(path_samples, fn))

# load gradient data
grad_df = pd.read_csv(bs_grad_path)

# get reds and blues
red_dt = template_df.copy()
red_dt = red_dt.loc[(red_dt["ppm"]>min(multiplets_ls[1])) & (red_dt["ppm"]<max(multiplets_ls[1]))]


In [None]:
# CELL 3

# user: ignore this cell. Admin only
# ===== run 1d_std_search =====
# results_dict = {}
# for k in sorted(list(df_dict.keys())):
#     target_df = df_dict[k]
#     results_dict[k] = do_1d_std_search(
#         query_df=template_df,
#         target_df=target_df,
#         multiplets_ls=multiplets_ls,
#         search_region_padding_size=search_region_padding_size
#     )

# # get corr_series for each k, stored in corr_series_dict
# blue_m1_dict = get_blue_m1_dict(results_dict, 
#                                 df_dict,
#                                 mcoords=multiplets_ls[1]
#                                )



In [None]:
# CELL 4

# print out list of sample names, to copy-paste into cell 5
for k in sorted(list(df_dict.keys())):
    print(k)

### Toggle Constant And Coefficient

Change gradient (`m`) and intercept (`c`) as needed, iterating until the blue (`sample`) and red (`std`) fit in the plot in the cell one after this one. 

In [None]:
# CELL 5

# pick a sample
sample_name = "AF65299"
# gradient, change as required
m = 0.3
# intercept, change as required
c = 1E6

In [None]:
# CELL 6

blue_m1_dt = blue_m1_dict[sample_name].copy()

plt.figure(figsize=(10, 6))

plt.plot(blue_m1_dt.ppm.values, blue_m1_dt.intensity.values, c="steelblue", label=sample_name)
plt.plot(red_dt.ppm.values, (red_dt.intensity.values*m)+c, c="indianred", label="std")
plt.legend()

plt.show()

In [None]:
# CELL 7

# print out AUC
auc = np.sum(red_dt.intensity.values*m)+c
print(auc)

conc_ls = grad_df["V1"].values * auc
print(f"conc: average = {np.average(conc_ls)}, sd = {np.std(conc_ls, ddof=1)}")