In [10]:
n_cpu = 8
PARALLEL = True

In [2]:
import pandas as pd 
import numpy as np
from scipy.optimize.minpack import curve_fit
import matplotlib.pyplot as plt
from joblib import Parallel, delayed

%matplotlib inline

In [4]:
def curve_fit_coeffs(
    exp_conc_protein: pd.Series, net_mfi_protein: pd.Series
) -> np.ndarray:
    """Find a curve fit using the standard concentrations and MFIs from the panel xPonent file.
    Args:
        exp_conc_protein: expected concentrations of the standards
        net_mfi_protein: net MFIs of the standards
    Returns: numpy array of the curve fit coefficients
    """
    min_mfi = net_mfi_protein.min()
    max_mfi = net_mfi_protein.max()
    p0 = [min_mfi, 1, exp_conc_protein.median(), max_mfi, 1]
    xdata = list(exp_conc_protein)
    ydata = list(net_mfi_protein)
    coeffs, _ = curve_fit(
        f=logistic5p,
        xdata=xdata,
        ydata=ydata,
        p0=p0,
        sigma=ydata,
        method="lm",
        maxfev=int(1e5),
        absolute_sigma=True,
    )
    return coeffs


def logistic5p(x: float, A: float, B: float, C: float, D: float, F: float) -> float:
    """5-parameter logistic equation"""    
    return D + (A - D) / ((1 + (x / C) ** B) ** F)


def inv_logistic5p(y: float, A: float, B: float, C: float, D: float, F: float) -> float:
    """Inverse of the 5-parameter logistic equation"""
    return C * (((((A - D) / (y - D)) ** (1.0 / F)) - 1.0) ** (1.0 / B))


def parallel_runner(sub_df):
    sub_df = sub_df.loc[~sub_df.isna()["net_mfi"].values]
    estimated_coefficients = curve_fit_coeffs(sub_df["standard_expected_concentration"], sub_df["net_mfi"])
    inferred_concentrations = np.array([inv_logistic5p(net_mfi, *estimated_coefficients) for net_mfi in sub_df["net_mfi"]])
    sub_df["inferred_concentration"] = inferred_concentrations
    return sub_df


## Import processed data 

This file `/home/ddhillon/projects/beta-av-testing/data/notebooks/process-pseudo-guardbanding/standard-curve

In [5]:
missing_df = pd.read_csv('/home/ddhillon/projects/beta-av-testing/data/processed/std-curve-replicate-analysis/missing-sim-data.csv')
original_df = pd.read_csv('/home/ddhillon/projects/beta-av-testing/data/processed/std-curve-replicate-analysis/original_mod_data.csv')


In [11]:
if PARALLEL:
    result_df = Parallel(n_cpu)(delayed(parallel_runner)(sub_df) for _, sub_df in missing_df.groupby(["batch", "assay", "combs"]))
else:
    result_dfs = []
    for i, (_, sub_df) in enumerate(missing_df.groupby(["batch", "assay", "combs"])):
        result_df = parallel_runner(sub_df)
        result_dfs.append(result_df)

missing_result_df = pd.concat(result_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [63]:
if PARALLEL:
    result_df = Parallel(n_cpu)(delayed(parallel_runner)(sub_df) for _, sub_df in original_df.groupby(["batch", "assay"]))
else:
    result_dfs = []
    for _, sub_df in original_df.groupby(["batch", "assay"]):
        result_df = parallel_runner(sub_df)
        result_dfs.append(result_df)

original_result_df = pd.concat(result_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [7]:
result_dfs

[      combs       stds  iter xponent_id  assay  rep_num  median_mfi  \
 0        x1  Standard1     1  Standard1  FLT3L        1     20019.0   
 1        x1  Standard1     1  Standard1  FLT3L        2     26646.0   
 486      x1  Standard1     1  Standard2  FLT3L        1      4726.0   
 487      x1  Standard1     1  Standard2  FLT3L        2      5649.0   
 488      x1  Standard1     1  Standard2  FLT3L        3      6186.0   
 ...     ...        ...   ...        ...    ...      ...         ...   
 27417    x1  Standard6     6  Standard4  FLT3L        4       499.0   
 27430    x1  Standard6     6  Standard5  FLT3L        1       282.0   
 27431    x1  Standard6     6  Standard5  FLT3L        2       268.0   
 27432    x1  Standard6     6  Standard5  FLT3L        3       301.0   
 27433    x1  Standard6     6  Standard5  FLT3L        4       286.0   
 
        standard_expected_concentration    calc_conc  pct_recovery  \
 0                           2840.00000  2578.323672         90.