## Fitting drug response curves with sigmoid function

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
import os, sys
sys.path.insert(1, os.path.relpath("functions"))
from fitting import *
from plotting import *
R2_limit = 0.99

_FOLDER = "results/"
_FOLDER_2 = "data/"

## Fitting data

In [2]:
df_auc = pd.read_csv(_FOLDER+"filt_auc_02.csv")
df_1234 = pd.read_csv(_FOLDER+"filt_1234_02.csv")
drug_curves = pd.read_csv(_FOLDER_2+"normalised_dose_response_data.csv")
conc_columns= ["fd_num_"+str(i) for i in range(10)]
response_norm = ['norm_cells_'+str(i) for i in range(10)]

df_auc.shape, df_1234.shape, drug_curves.shape

((14084, 31), (2108, 30), (225384, 44))

In [3]:
functions = [
    "fsigmoid", 
    "sigmoid_2_param",
    "sigmoid_3_param",
    "sigmoid_4_param",
    "logistic_4_param",
    "ll4_4_param",
    "ll4R_4_param",
    "logLogist_3_param"
]

In [4]:
def compare_fitting(df_raw, fitting_functions_list):
    df_results = pd.DataFrame(columns= ["R2>0.9", "R2>0.95", "R2>0.99"])
    for fitting_function in fitting_functions_list:
        df = df_raw.copy()
        print("\n", fitting_function)
        r2, fit_param = fitting_column(df, df.index, x_columns=conc_columns, y_columns= response_norm,
                               fitting_function = fitting_function, default_param=True)
        df[fitting_function+"_r2"] = r2
        df[fitting_function] = fit_param
#         df= df[df[fitting_function+"_r2"]>0]
        print("R2>0:", df.shape)
        print("R2>", R2_limit, df[df[fitting_function+"_r2"]>R2_limit].shape[0])
        df_results.loc[fitting_function, "R2>0.9"] = df[df[fitting_function+"_r2"]>0.9].shape[0]
        df_results.loc[fitting_function, "R2>0.95"] = df[df[fitting_function+"_r2"]>0.95].shape[0]
        df_results.loc[fitting_function, "R2>0.99"] = df[df[fitting_function+"_r2"]>0.99].shape[0]
        print("Number of samples with fitting <0.1:", df[df[fitting_function+"_r2"]<0.1].shape[0])
        print("")
    return df, df_results

In [5]:
%%time
df, df_results = compare_fitting(df_auc, functions)
df_results.to_csv(_FOLDER+"fit_auc_02_compare.csv", index=False)
df.to_csv(_FOLDER+"filt_auc_02_fit.csv", index=False)

  0%|          | 21/14084 [00:00<01:07, 208.56it/s]


 fsigmoid


100%|██████████| 14084/14084 [00:48<00:00, 287.83it/s]
  0%|          | 0/14084 [00:00<?, ?it/s]

<function fsigmoid at 0x7f86482d6c80>
R2>0: (14067, 33)
R2> 0.99 1423
Number of samples with fitting <0.1: 4


 sigmoid_2_param


100%|██████████| 14084/14084 [01:22<00:00, 171.12it/s]
  0%|          | 24/14084 [00:00<00:59, 236.99it/s]

<function sigmoid_2_param at 0x7f86482d69d8>
R2>0: (14073, 33)
R2> 0.99 1388
Number of samples with fitting <0.1: 7


 sigmoid_3_param


100%|██████████| 14084/14084 [01:13<00:00, 190.34it/s]
  0%|          | 0/14084 [00:00<?, ?it/s]

<function sigmoid_3_param at 0x7f86482d6d08>
R2>0: (14055, 33)
R2> 0.99 2510
Number of samples with fitting <0.1: 9


 sigmoid_4_param


100%|██████████| 14084/14084 [02:37<00:00, 89.17it/s] 
  0%|          | 18/14084 [00:00<01:18, 180.00it/s]

<function sigmoid_4_param at 0x7f86482d6d90>
R2>0: (13158, 33)
R2> 0.99 3620
Number of samples with fitting <0.1: 616


 logistic_4_param


100%|██████████| 14084/14084 [01:21<00:00, 172.64it/s]
  0%|          | 9/14084 [00:00<02:39, 88.38it/s]

<function logistic_4_param at 0x7f86482d6f28>
R2>0: (14078, 33)
R2> 0.99 3635
Number of samples with fitting <0.1: 10


 ll4_4_param


100%|██████████| 14084/14084 [01:34<00:00, 148.60it/s]
  0%|          | 10/14084 [00:00<02:48, 83.74it/s]

<function ll4_4_param at 0x7f86482d6e18>
R2>0: (14076, 33)
R2> 0.99 3636
Number of samples with fitting <0.1: 11


 ll4R_4_param


100%|██████████| 14084/14084 [01:17<00:00, 181.57it/s]
  0%|          | 16/14084 [00:00<01:28, 158.76it/s]

<function ll4R_4_param at 0x7f86482d6ea0>
R2>0: (14081, 33)
R2> 0.99 3622
Number of samples with fitting <0.1: 12


 logLogist_3_param


100%|██████████| 14084/14084 [01:35<00:00, 147.32it/s]

<function logLogist_3_param at 0x7f86482e0048>
R2>0: (14072, 33)
R2> 0.99 3027
Number of samples with fitting <0.1: 0

CPU times: user 8min 54s, sys: 31.2 s, total: 9min 26s
Wall time: 11min 53s





In [6]:
%%time
df, df_results_2 = compare_fitting(df_1234, functions)
df_results_2.to_csv(_FOLDER+"fit_1234_compare.csv", index=False)
df.to_csv(_FOLDER+"filt_1234_fit.csv", index=False)

  0%|          | 0/2108 [00:00<?, ?it/s]


 fsigmoid


100%|██████████| 2108/2108 [00:03<00:00, 574.79it/s]
  3%|▎         | 59/2108 [00:00<00:03, 587.30it/s]

<function fsigmoid at 0x7fc8e9ad59d8>
R2>0: (2108, 32)
R2> 0.99 921
Number of samples with fitting <0.1: 0


 sigmoid_2_param


100%|██████████| 2108/2108 [00:03<00:00, 542.88it/s]
  2%|▏         | 42/2108 [00:00<00:04, 417.69it/s]

<function sigmoid_2_param at 0x7fc8e9ad5730>
R2>0: (2108, 32)
R2> 0.99 921
Number of samples with fitting <0.1: 0


 sigmoid_3_param


100%|██████████| 2108/2108 [00:05<00:00, 398.65it/s]
  1%|▏         | 29/2108 [00:00<00:07, 284.70it/s]

<function sigmoid_3_param at 0x7fc8e9ad5a60>
R2>0: (2108, 32)
R2> 0.99 1220
Number of samples with fitting <0.1: 0


 sigmoid_4_param


100%|██████████| 2108/2108 [00:06<00:00, 335.71it/s]
  1%|▏         | 31/2108 [00:00<00:06, 305.84it/s]

<function sigmoid_4_param at 0x7fc8e9ad5ae8>
R2>0: (2090, 32)
R2> 0.99 1531
Number of samples with fitting <0.1: 17


 logistic_4_param


100%|██████████| 2108/2108 [00:06<00:00, 333.81it/s]
  1%|▏         | 28/2108 [00:00<00:07, 273.74it/s]

<function logistic_4_param at 0x7fc8e9ad5c80>
R2>0: (2108, 32)
R2> 0.99 1473
Number of samples with fitting <0.1: 0


 ll4_4_param


100%|██████████| 2108/2108 [00:07<00:00, 269.47it/s]
  1%|▏         | 27/2108 [00:00<00:07, 266.27it/s]

<function ll4_4_param at 0x7fc8e9ad5b70>
R2>0: (2108, 32)
R2> 0.99 1472
Number of samples with fitting <0.1: 0


 ll4R_4_param


100%|██████████| 2108/2108 [00:07<00:00, 287.35it/s]
  1%|▏         | 31/2108 [00:00<00:06, 309.32it/s]

<function ll4R_4_param at 0x7fc8e9ad5bf8>
R2>0: (2108, 32)
R2> 0.99 1469
Number of samples with fitting <0.1: 2


 logLogist_3_param


100%|██████████| 2108/2108 [00:07<00:00, 284.29it/s]

<function logLogist_3_param at 0x7fc8e9ad5d08>
R2>0: (2108, 32)
R2> 0.99 1465
Number of samples with fitting <0.1: 0

CPU times: user 45.5 s, sys: 2.07 s, total: 47.6 s
Wall time: 48.3 s





In [7]:
%%time
df, df_results_3 = compare_fitting(drug_curves, functions)
df_results_3.to_csv(_FOLDER+"fit_no_filt_compare.csv", index=False)
df.to_csv(_FOLDER+"filt_fit.csv", index=False)

  0%|          | 0/225384 [00:00<?, ?it/s]


 fsigmoid


100%|██████████| 225384/225384 [07:26<00:00, 505.23it/s] 


<function fsigmoid at 0x7fc8e9ad59d8>
R2>0: (154078, 46)
R2> 0.99 6639
Number of samples with fitting <0.1: 14455



  0%|          | 0/225384 [00:00<?, ?it/s]


 sigmoid_2_param


100%|██████████| 225384/225384 [07:11<00:00, 521.93it/s] 


<function sigmoid_2_param at 0x7fc8e9ad5730>
R2>0: (147301, 46)
R2> 0.99 6586
Number of samples with fitting <0.1: 10227



  0%|          | 0/225384 [00:00<?, ?it/s]


 sigmoid_3_param


100%|██████████| 225384/225384 [13:48<00:00, 271.89it/s] 


<function sigmoid_3_param at 0x7fc8e9ad5a60>
R2>0: (189861, 46)
R2> 0.99 11404
Number of samples with fitting <0.1: 26775



  0%|          | 0/225384 [00:00<?, ?it/s]


 sigmoid_4_param


100%|██████████| 225384/225384 [21:03<00:00, 178.44it/s] 


<function sigmoid_4_param at 0x7fc8e9ad5ae8>
R2>0: (168543, 46)
R2> 0.99 16322
Number of samples with fitting <0.1: 35444



  0%|          | 0/225384 [00:00<?, ?it/s]


 logistic_4_param


100%|██████████| 225384/225384 [25:15<00:00, 148.67it/s] 


<function logistic_4_param at 0x7fc8e9ad5c80>
R2>0: (182648, 46)
R2> 0.99 18227
Number of samples with fitting <0.1: 11737



  0%|          | 0/225384 [00:00<?, ?it/s]


 ll4_4_param


100%|██████████| 225384/225384 [31:53<00:00, 117.81it/s] 


<function ll4_4_param at 0x7fc8e9ad5b70>
R2>0: (182050, 46)
R2> 0.99 18250
Number of samples with fitting <0.1: 13623



  0%|          | 0/225384 [00:00<?, ?it/s]


 ll4R_4_param


100%|██████████| 225384/225384 [5:30:56<00:00, 11.35it/s]      


<function ll4R_4_param at 0x7fc8e9ad5bf8>
R2>0: (183161, 46)
R2> 0.99 18190
Number of samples with fitting <0.1: 14135



  0%|          | 0/225384 [00:00<?, ?it/s]


 logLogist_3_param


100%|██████████| 225384/225384 [4:10:29<00:00, 15.00it/s]        


<function logLogist_3_param at 0x7fc8e9ad5d08>
R2>0: (167680, 46)
R2> 0.99 16457
Number of samples with fitting <0.1: 15654

CPU times: user 2h 32min 12s, sys: 10min 25s, total: 2h 42min 38s
Wall time: 11h 28min 19s


In [None]:
df_results

In [6]:
df_results.sort_values("R2>0.99")

Unnamed: 0,R2>0.9,R2>0.95,R2>0.99
sigmoid_2_param,10353,7215,1388
fsigmoid,10396,7280,1423
sigmoid_3_param,11991,9378,2510
logLogist_3_param,12039,9724,3027
sigmoid_4_param,11204,9549,3620
ll4R_4_param,12421,10275,3622
logistic_4_param,12428,10286,3635
ll4_4_param,12428,10287,3636


In [None]:
df_results_2

In [9]:
df_results_2.sort_values("R2>0.99")

Unnamed: 0,R2>0.9,R2>0.95,R2>0.99
fsigmoid,2058,1874,921
sigmoid_2_param,2058,1874,921
sigmoid_3_param,2098,2031,1220
logLogist_3_param,2100,2051,1465
ll4R_4_param,2099,2057,1469
ll4_4_param,2103,2063,1472
logistic_4_param,2103,2063,1473
sigmoid_4_param,2070,2030,1531


In [None]:
df_results_3

In [10]:
df_results_3.sort_values("R2>0.99")

Unnamed: 0,R2>0.9,R2>0.95,R2>0.99
sigmoid_2_param,53016,32148,6586
fsigmoid,53108,32254,6639
sigmoid_3_param,70277,46814,11404
sigmoid_4_param,65700,49231,16322
logLogist_3_param,75034,54864,16457
ll4R_4_param,78654,57503,18190
logistic_4_param,78979,57688,18227
ll4_4_param,78843,57668,18250
