In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os, sys, time
import argparse
import tarfile
import re
import scipy.stats as stats

##impor  BMD files from directory
##TODO: combine bMD processing to single file, combine LPR processing to single file
import bmd_analysis_morpho as bmd
import bmd_analysis_LPR_7_PAH_t0_t239 as bmd_LPR
import format_LPR_input as format_LPR
import format_morpho_input as format_morpho

import statsmodels.formula.api as smf
import statsmodels.api as sm

/Users/degn400/Desktop/Git_Repos/srpAnalytics/zfBmd
/Users/degn400/Desktop/Git_Repos/srpAnalytics/zfBmd


# Morpho Step 1: Format File

In [2]:
# This is the format_morpho_input function

##############################################
## READ FILE AND SUBSET TO RELEVANT COLUMNS ##
##############################################

# Read morphology file 
df_morph = pd.read_csv('./test_files/7_PAH_zf_morphology_data_2020NOV11_tall_3756.csv', header = 0)

# List relevant column names
relevant_columns = ['chemical.id', 'conc', 'plate.id', 'well', 'endpoint', 'value']

# The input file must absolutely have these columns, no exceptions 
if all(col in df_morph.columns for col in relevant_columns) == False:
    sys.exit(print("The input file", mfiles, "must have the columns:", ', '.join(relevant_columns)))

# Keep only relevant columns
df_morph = df_morph.loc[:,relevant_columns]

##################################
## SUBSET TO RELEVANT ENDPOINTS ##
##################################

# List the relevant endpoints, which is different for BRAIN samples  
if "BRAI" in list(df_morph["endpoint"].unique()):
    relevant_endpoints = ['AXIS', 'BRAI', 'CFIN', 'CIRC', 'DNC_', 'DP24', 'EYE_', 'JAW_', 'MO24', 
                          'MORT', 'NC24', 'NC__', 'OTIC', 'PE__', 'PFIN', 'PIG_', 'SM24', 'SNOU', 
                          'SOMI', 'SWIM', 'TRUN', 'TR__', 'YSE_']
else:
    relevant_endpoints = ['AXIS', 'BRN_', 'CRAN', 'DNC_', 'DP24', 'EDEM', 'LTRK', 'MO24', 'MORT', 
                          'MUSC', 'NC__', 'SKIN','SM24', 'TCHR']
    
# Subset down to the relevant endpoints 
df_morph = df_morph[df_morph["endpoint"].isin(relevant_endpoints)]

###########################
## ADD MISSING ENDPOINTS ##
###########################

def new_endpoint(endpoints, new_name):
    """
    Generate a new endpoint which is a binary "or" of other endpoints,
    meaning that if there is a 1 in any of the other endpoints, the 
    resulting endpoint is a 1. Otherwise, it is 0 unless the other 
    endpoints are all NA. Then the final value is NA.
    
    Attributes
    ----
    endpoints: A list of column names, as strings, to binary "or". 
    new_name: The name of the new endpoint. 
    
    """
    sub_df = df_morph[df_morph["endpoint"].isin(endpoints)]
    sub_df["endpoint"] = new_name
    sub_df = sub_df.groupby(by = ["chemical.id", "conc", "plate.id", "well", "endpoint"], as_index = False).sum()
    sub_df['value'].values[sub_df['value'] > 1] = 1 
    return(sub_df)
    
# New endpoints to add is a smaller list if the sample is not from BRAIN
if "BRAI" in list(df_morph["endpoint"].unique()):
    
    df_morph = pd.concat(
        [new_endpoint(['MO24','DP24','SM24','NC24'], 'ANY24'),
         new_endpoint(['MORT', 'YSE_', 'AXIS', 'EYE_', 'SNOU', 'JAW_', 'OTIC', 'PE__', 'BRAI', 
                      'SOMI', 'PFIN', 'CFIN', 'PIG_', 'CIRC', 'TRUN', 'SWIM', 'NC__', 'TR__', 
                      'ANY24'], 'ANY120'),
         new_endpoint(['MO24','MORT'], 'TOT_MORT'),
         new_endpoint(['DP24','SM24','NC24', 'YSE_', 'AXIS', 'EYE_', 'SNOU', 'JAW_', 'OTIC', 'PE__', 
                      'BRAI', 'SOMI', 'PFIN', 'CFIN', 'PIG_', 'CIRC','TRUN', 'SWIM', 'NC__', 'TR__'], 'ALL_BUT_MORT'),
         new_endpoint(['BRAI','OTIC','PFIN'], 'BRN_'),
         new_endpoint(['EYE_', 'SNOU', 'JAW_'], 'CRAN'),
         new_endpoint(['YSE_','PE__'], 'EDEM'),
         new_endpoint(['TRUN','CFIN'], 'LTRK'),
         new_endpoint(['CIRC','SWIM','SOMI'], 'MUSC'),
         new_endpoint(['PIG_'], 'SKIN'),
         new_endpoint(['TR__'], 'TCHR'),
         df_morph]
    )
    
else:
    
    df_morph = pd.concat(

        # 1. Add any effect at 24hrs (combination of MO24, DP24 and SM24) 
        [new_endpoint(['MO24','DP24','SM24'], 'ANY24'),

        # 2. Any effect within 5 days (combination of all measurements at both time points)
        new_endpoint(['AXIS', 'BRN_', 'CRAN', 'EDEM', 'LTRK', 'MORT', 'MUSC', 'NC__', 'SKIN', 'TCHR', 'ANY24'], 'ANY120'),

        # 3. Total mortality (MO24 + MORT) 
        new_endpoint(['MO24','MORT'], 'TOT_MORT'),

        # 4. Any effect except mortality (#2 minus MO24 and MORT)
        new_endpoint(['AXIS', 'BRN_', 'CRAN', 'DP24', 'EDEM', 'LTRK', 'MUSC', 'NC__', 'SKIN', 'SM24', 'TCHR'], 'ALL_BUT_MORT'),
        
        # Add original dataframe
        df_morph]
    )
    
###########################################
## CALCULATE VARIABLES FOR DOSE RESPONSE ##
###########################################

# Create groups of each chemical id, concentration, and plate id
plate_groups = df_morph.drop(["well"], 1).groupby(by = ["chemical.id", "conc", "plate.id", "endpoint"], as_index = False)

# Get the number of samples per group
num_tot_samples = plate_groups.size().rename(columns = {"size": "num.tot"})

# Get the number of non-na samples per groups
num_nonna = plate_groups.count().rename(columns = {"value": "num.nonna"})

# Get the number affected
num_affected = plate_groups.sum().rename(columns = {"value": "num.affected"})

# Merge to create missingness dataframe
plate_groups = pd.merge(pd.merge(num_tot_samples, num_nonna), num_affected)

# Create IDs of chemical.id, plate.id, and endpoint in plate_groups 
ids = []
for row in range(len(plate_groups)):
    ids.append(str(plate_groups["chemical.id"][row]) + " " + str(plate_groups["plate.id"][row]) + " " + str(plate_groups["endpoint"][row]))
plate_groups["ids"] = ids

#####################################################################
## REMOVE VARIABLES WITH HIGH MISSINGNESS IN BASELINE MEASUREMENTS ##
#####################################################################

# Identify 0 (baseline) concentrations with high missingness (greater than 50% missing or less than 50% non-missing)
missingness = plate_groups.loc[plate_groups["conc"] == 0]
missingness["keep"] = missingness["num.nonna"] / missingness["num.tot"] > 0.5 # TODO: Add a report of what was removed --> txt file "nothing removed"

# Identify plates to keep 
tokeep = missingness.loc[missingness["keep"]]["ids"].tolist()
plate_groups = plate_groups[plate_groups["ids"].isin(tokeep)]

# Stop if everything gets removed
if len(plate_groups) == 0:
    sys.exit("Everything was removed with the 50% missingness filter")

#######################################
## REGROUP WITHOUT PLATE IDS AND SUM ##
#######################################

# First, remove plate.id and ids column
chemical_groups = plate_groups.drop(columns = ["plate.id", "ids"])

# Group by chemical.id, concentration, and endpoint. Then, sum the results. 
chemical_groups = chemical_groups.groupby(by = ["chemical.id", "conc", "endpoint"]).sum().reset_index()

############################
## RETURN FORMATTED TABLE ##
############################
chemical_groups


Unnamed: 0,chemical.id,conc,endpoint,num.tot,num.nonna,num.affected
0,3756,0.0,ALL_BUT_MORT,36,36,2.0
1,3756,0.0,ANY120,36,36,2.0
2,3756,0.0,ANY24,36,36,4.0
3,3756,0.0,AXIS,36,32,2.0
4,3756,0.0,BRN_,36,32,0.0
...,...,...,...,...,...,...
139,3756,100.0,NC__,36,30,0.0
140,3756,100.0,SKIN,36,30,0.0
141,3756,100.0,SM24,36,32,0.0
142,3756,100.0,TCHR,36,30,0.0


# Morpho Step 2: Generate Dose Response and Write Flags

In [3]:
# This is the generate_dose_response function

dose_response = chemical_groups

'''This function performs feasibility analysis
for dose response data. The value returned is a
flag indicating data quality as defined below:

0: Not enough dose groups for BMD analysis. BMD analysis not performed. BAD.
1: No trend detected in dose-response data. BMD analysis not performed. BAD. 
2: Good dose-response data. BMD analysis is performed. GOOD.
3: Dose-response data quality poor. BMD analysis might be unreliable. GOOD. 
4: Data resolution poor. BMD analysis might be unreliable. MODERATE.
5: Negative correlation. 

'''

########################
## CALCULATE RESPONSE ##
########################

# Add an id column
ids = []
for row in range(len(dose_response)):
    ids.append(str(dose_response["chemical.id"][row]) + " " + str(dose_response["endpoint"][row]))
dose_response["ids"] = ids

# Response is the number affected over the number of embryos (non-na)
dose_response["frac.affected"] = dose_response["num.affected"] / dose_response["num.nonna"]

####################################
## GENERATE QUALITY CONTROL FLAGS ##
####################################

# Count the number of unique concentrations per chemical id and endpoint pairing 
BMD_Flags = dose_response.groupby(["chemical.id", "endpoint", "ids"])["conc"].nunique().reset_index().rename(columns = {"conc": "num.conc"})

# Add a flag category
BMD_Flags["flag"] = None

# If there are less than 3 points, the BMD flag is 0 - not enough dose groups
BMD_Flags["flag"].values[BMD_Flags["num.conc"] < 3] = 0

# Change dose response 
dose_response["conc"] = dose_response["conc"].astype('float') + 1e-15

# Calculate the spearman correlation
spear = dose_response[["chemical.id", "endpoint", "conc", "frac.affected"]].groupby(["chemical.id", "endpoint"]).corr(method = "spearman").unstack().iloc[:,1].reset_index()
spear = spear.set_axis(["chemical.id", "endpoint", "spearman"], axis = 1)

# Merge spearman to the BMD_Flags dataframe
BMD_Flags = BMD_Flags.merge(spear)

# If spearman correlation is below 0.2 or is NaN, the Flag is 1 - no strong trend 
BMD_Flags["flag"].values[(BMD_Flags["spearman"] < 0.2) | (BMD_Flags["spearman"].isna())] = 1

# If the correlation is above 0.25 or below 0.8, run a t-test
BMD_Flags["run.ttest"] = (BMD_Flags["spearman"] >= 0.20) & (BMD_Flags["spearman"] <= 0.80)

# If the correlation is above 0.8, assign the flag at 2 - good
BMD_Flags["flag"].values[BMD_Flags["spearman"] > 0.8] = 2

# Run the t-test only where indicated 
ttest = dose_response[dose_response["ids"].isin(BMD_Flags[BMD_Flags["run.ttest"]]["ids"].to_list())][["ids", "frac.affected"]]
ttest = ttest.groupby("ids").apply(lambda df: stats.ttest_1samp(np.diff(df["frac.affected"]), 0)[1]).reset_index().rename(columns = {0:"ttest.pval"})

# Merge ttest results 
BMD_Flags = BMD_Flags.merge(ttest, on = "ids", how = "outer")

# A p-value of less than 0.05 gets a flag of 2 - good, from 0.05 to 0.32 gets a 3 - unreliable, 
# and greater than 0.32 gets very unreliable. 
BMD_Flags["flag"].values[BMD_Flags["ttest.pval"] <= 0.05] = 2
BMD_Flags["flag"].values[(BMD_Flags["ttest.pval"] > 0.05) & (BMD_Flags["ttest.pval"] <= 0.32)] = 3
BMD_Flags["flag"].values[BMD_Flags["ttest.pval"] > 0.32] = 4

BMD_Flags


Unnamed: 0,chemical.id,endpoint,ids,num.conc,flag,spearman,run.ttest,ttest.pval
0,3756,ALL_BUT_MORT,3756 ALL_BUT_MORT,8,4,0.739516,True,0.85284
1,3756,ANY120,3756 ANY120,8,4,0.566306,True,0.896837
2,3756,ANY24,3756 ANY24,8,4,0.727393,True,0.792745
3,3756,AXIS,3756 AXIS,8,4,0.357143,True,0.90014
4,3756,BRN_,3756 BRN_,8,1,,False,
5,3756,CRAN,3756 CRAN,8,4,0.357143,True,0.90014
6,3756,DNC_,3756 DNC_,8,4,0.247436,True,1.0
7,3756,DP24,3756 DP24,8,2,0.805118,False,
8,3756,EDEM,3756 EDEM,8,4,0.357143,True,0.90014
9,3756,LTRK,3756 LTRK,8,1,0.0,False,


# Morpho Step 3: Select and Run Models

In [4]:
import BMD_Analysis_Functions as baf
# This is a new select_and_run_models function which will take BMD_Flags and dose_response

# Set a global benchmark response variable
BMR = 0.1

###########################################
## CALCULATE VALUES FOR LOW QUALITY DATA ##
###########################################

# Subset dose response for auc, min, max, and auc_norm calculations
low_quality = dose_response[dose_response["ids"].isin(BMD_Flags[BMD_Flags["flag"].isin([0,1])]["ids"])].groupby("ids")

# Calculate low quality metrics and start new BMDS file 
BMDS = low_quality.apply(lambda df: np.trapz(df["frac.affected"], x = df["conc"])).reset_index().rename(columns = {0: "AUC"})
min_dose = low_quality[["ids", "conc"]].min("conc").reset_index().rename(columns = {"conc": "Min_Dose"})
max_dose = low_quality[["ids", "conc"]].max("conc").reset_index().rename(columns = {"conc": "Max_Dose"})
BMDS["AUC_Norm"] = BMDS["AUC"] / (max_dose["Max_Dose"] - min_dose["Min_Dose"])

# Add missing columns 
BMDS = BMDS.merge(min_dose).merge(max_dose).merge(BMD_Flags[["ids", "flag"]]).rename(columns = {"flag":"Data QC_Flag"})
BMDS[["Model", "BMD10", "BMDL", "BMD50", "BMD_Analysis_Flag", "BMD10_Flag", "BMD50_Flag"]] = np.nan

# Reorder columns
BMDS = BMDS[["ids", "Model", "BMD10", "BMDL", "BMD50", "AUC", "Min_Dose", "Max_Dose", "AUC_Norm", "Data QC_Flag", 
             "BMD_Analysis_Flag", "BMD10_Flag", "BMD50_Flag"]]

################
## RUN MODELS ##
################

# Define function to calculate p-values for fit
def calc_p_value(ID, CONVERGED, MODEL_DF):
    '''Return a p-value of model fit for each unique ID and Model dataframe pairing'''

    # If converged, return a value. Otherwise, return NA
    if (CONVERGED):

        # Get the non-na totals
        NonNATotals = dose_response[dose_response["ids"] == ID]["num.nonna"].tolist() 

        # Get the predicted values 
        PredictedValues = MODEL_DF[MODEL_DF["ids"] == ID]["Pred_Value"].tolist()[0]

        # Get the experimental values 
        ExperimentalValues = dose_response[dose_response["ids"] == ID]["frac.affected"].tolist()

        # Now, calculate the chi squared value
        ChiSquared = ((NonNATotals / (PredictedValues * (1 - PredictedValues))) * (ExperimentalValues - PredictedValues)**2).sum()

        # Calculate a p-value of fit 
        return(stats.chi2.sf(ChiSquared, len(NonNATotals) - len(MODEL_DF["Params"][0])))

    else:

        return(np.NAN)


# Subset data to data without low quality
Model_Data = dose_response[dose_response["ids"].isin(BMD_Flags[BMD_Flags["flag"].isin([2,3,4,5])]["ids"])].groupby("ids")

# Iterate through each model and track convergence, parameter estimates, bmd, predicted values, and llf

## Logistic Model ##  
Logistic = Model_Data.apply(lambda df: baf.Logistic(df[["conc", "num.affected", "num.nonna"]].astype('float').copy())).reset_index().rename(columns = {0:"Logistic"})
Logistic["Converged"] = [x.fit().mle_retvals['converged'] for x in Logistic["Logistic"]]
Logistic["Params"] = [x.fit().params for x in Logistic["Logistic"]] # Returned as alpha, beta
Logistic["BMD"] = [np.log((1 + np.exp(-Logistic["Params"][x][0])*BMR)/(1-BMR))/Logistic["Params"][x][1] for x in range(len(Logistic))]
Logistic["Pred_Value"] = [baf.logistic_fun(dose_response[dose_response["ids"] == Logistic["ids"][x]]["conc"], Logistic["Params"][x]).values for x in range(len(Logistic))]
Logistic["p-value"] = [calc_p_value(Logistic["ids"][x], Logistic["Converged"][x], Logistic) for x in range(len(Logistic))]

Logistic

Unnamed: 0,ids,Logistic,Converged,Params,BMD,Pred_Value,p-value
0,3756 ALL_BUT_MORT,<BMD_Analysis_Functions.Logistic object at 0x7...,True,"[-2.969372926653592, 0.0113689182549512]",104.361497,"[0.048828838920786384, 0.049982091546377054, 0...",0.697625
1,3756 ANY120,<BMD_Analysis_Functions.Logistic object at 0x7...,True,"[-2.2183177419683244, 0.006203589797364616]",122.068255,"[0.09811756756010856, 0.09930972068533303, 0.1...",0.091242
2,3756 ANY24,<BMD_Analysis_Functions.Logistic object at 0x7...,True,"[-2.458645092716963, 0.012997194293195768]",67.674525,"[0.07880864439880907, 0.08087100274074654, 0.0...",0.509724
3,3756 AXIS,<BMD_Analysis_Functions.Logistic object at 0x7...,True,"[-3.03405635563181, 0.008373382602600595]",146.856383,"[0.04591081879091692, 0.046709600844063816, 0....",0.224224
4,3756 CRAN,<BMD_Analysis_Functions.Logistic object at 0x7...,True,"[-3.03405635563181, 0.008373382602600595]",146.856383,"[0.04591081879091692, 0.046709600844063816, 0....",0.224224
5,3756 DNC_,<BMD_Analysis_Functions.Logistic object at 0x7...,True,"[-6.357087129283908, 0.015237085468314782]",274.137596,"[0.0017314085025385794, 0.0017892372548981566,...",0.439695
6,3756 DP24,<BMD_Analysis_Functions.Logistic object at 0x7...,True,"[-4.36097530445879, 0.02335290551491337]",97.799171,"[0.012605016141491655, 0.013248511342910774, 0...",0.496565
7,3756 EDEM,<BMD_Analysis_Functions.Logistic object at 0x7...,True,"[-3.03405635563181, 0.008373382602600595]",146.856383,"[0.04591081879091692, 0.046709600844063816, 0....",0.224224
8,3756 MO24,<BMD_Analysis_Functions.Logistic object at 0x7...,True,"[-2.591962288500479, 0.008191334571311938]",116.419048,"[0.06965750866618277, 0.07081289503688536, 0.0...",0.776113
9,3756 MORT,<BMD_Analysis_Functions.Logistic object at 0x7...,True,"[-2.6969406885996916, 0.005624733934389351]",180.453044,"[0.06315412056577321, 0.06387677609813608, 0.0...",0.017639


In [24]:
Model_Input = dose_response[dose_response["ids"].isin(BMD_Flags[BMD_Flags["flag"].isin([2,3,4,5])]["ids"])]
Data = Model_Input[Model_Input["ids"] == "3756 ALL_BUT_MORT"]
Data

Unnamed: 0,chemical.id,conc,endpoint,num.tot,num.nonna,num.affected,ids,frac.affected
0,3756,1e-15,ALL_BUT_MORT,36,36,2.0,3756 ALL_BUT_MORT,0.055556
18,3756,2.16,ALL_BUT_MORT,36,36,1.0,3756 ALL_BUT_MORT,0.027778
36,3756,6.8,ALL_BUT_MORT,36,36,1.0,3756 ALL_BUT_MORT,0.027778
54,3756,14.7,ALL_BUT_MORT,36,36,3.0,3756 ALL_BUT_MORT,0.083333
72,3756,31.6,ALL_BUT_MORT,36,36,2.0,3756 ALL_BUT_MORT,0.055556
90,3756,56.2,ALL_BUT_MORT,36,36,4.0,3756 ALL_BUT_MORT,0.111111
108,3756,75.0,ALL_BUT_MORT,36,36,6.0,3756 ALL_BUT_MORT,0.166667
126,3756,100.0,ALL_BUT_MORT,36,36,3.0,3756 ALL_BUT_MORT,0.083333


In [30]:
# List the ID
ID = "3756 ALL_BUT_MORT"

# Get the number of nonNA samples per dose
NonNATotals = dose_response[dose_response["ids"] == ID]["num.nonna"].tolist() 

# Calculate P-Value Function
def calc_p_value(PredictedValues, Params):
    '''Return a p-value of model fit for each unique ID and Model dataframe pairing'''

    # Get the experimental values 
    ExperimentalValues = dose_response[dose_response["ids"] == ID]["frac.affected"].tolist()

    # Now, calculate the chi squared value
    ChiSquared = ((NonNATotals / (PredictedValues * (1 - PredictedValues))) * (ExperimentalValues - PredictedValues)**2).sum()

    # Calculate a p-value of fit 
    return(stats.chi2.sf(ChiSquared, len(NonNATotals) - len(Params)))

# Run a regression model, and return the parameters, fitted values, and p_vals


## Logistic ##
logistic_model = baf.Logistic(Data[["conc", "num.affected", "num.nonna"]].astype('float').copy())
logistic_params = logistic_model.fit().params
logistic_fittedvals = baf.logistic_fun(Data[["conc"]], logistic_params)["conc"]
logistic_pval = calc_p_value(logistic_fittedvals, logistic_params)

print(logistic_params, logistic_fittedvals, logistic_pval)

[-2.96937293  0.01136892] 0      0.048829
18     0.049982
36     0.052547
54     0.057203
72     0.068490
90     0.088633
108    0.107484
126    0.137943
Name: conc, dtype: float64 0.6976250444936194


### Logistic Regression

In [6]:
# Define the universal formula used by all regression models 
mainFormula = "affected ~ conc"

# At a minimum, we need fitted values and parameters 
logistic_model = smf.glm(formula = mainFormula, data = Data, family = sm.families.Binomial())
PredictedValues = logistic_model.fit().predict()
params = logistic_model.fit().params

ID = "3756 ALL_BUT_MORT"
NonNATotals = dose_response[dose_response["ids"] == ID]["num.nonna"].tolist() 
ExperimentalValues = dose_response[dose_response["ids"] == ID]["frac.affected"].tolist()
ChiSquared = ((NonNATotals / (PredictedValues * (1 - PredictedValues))) * (ExperimentalValues - PredictedValues)**2).sum()
P_Value = stats.chi2.sf(ChiSquared, len(NonNATotals) - len(params))
print(PredictedValues, params, P_Value)

[0.04882884 0.04998209 0.05254725 0.05720272 0.06849026 0.08863314
 0.10748384 0.13794297] Intercept   -2.969373
conc         0.011369
dtype: float64 0.6976250444889138


### Gamma

In [7]:
gamma_model = smf.glm(formula = mainFormula, data = Data, family = sm.families.Gamma())
PredictedValues = gamma_model.fit().predict()
params = gamma_model.fit().params
ChiSquared = ((NonNATotals / (PredictedValues * (1 - PredictedValues))) * (ExperimentalValues - PredictedValues)**2).sum()
P_Value = stats.chi2.sf(ChiSquared, len(NonNATotals) - len(params))
print(PredictedValues, params, P_Value)

[0.05178831 0.05252504 0.05418076 0.05725354 0.06515888 0.08154912
 0.10095662 0.14769884] Intercept    19.309378
conc         -0.125388
dtype: float64 0.5738022668428853


### Weibull

In [20]:
weibull_model = baf.Weibull(Data[["conc", "num.affected", "num.nonna"]].astype('float').copy())
params = weibull_model.fit().params
PredictedValues = baf.weibull_fun(Data[["conc"]], params)["conc"]
ChiSquared = ((NonNATotals / (PredictedValues * (1 - PredictedValues))) * (ExperimentalValues - PredictedValues)**2).sum()
P_Value = stats.chi2.sf(ChiSquared, len(NonNATotals) - len(params))

print(PredictedValues, params, P_Value)

0      0.038808
18     0.043164
36     0.049809
54     0.059260
72     0.076500
90     0.098221
108    0.113264
126    0.131849
Name: conc, dtype: float64 [0.03880788 0.81086557 0.00243252] 0.6578177952143408
